Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -35,6 +35,7 @@ class ConstantRange; class DataLayout; class LLVMContext; +template class UnwindDestIterator; enum AtomicOrdering { NotAtomic = 0, @@ -3710,6 +3711,16 @@ Op<-1>() = reinterpret_cast(B); } + /// getTransitiveUnwindDests - get an iterator that visits all EH pads + /// this invoke may reach by unwinding through its unwind edge and any + /// ensuing unsplittable blocks, optionally visiting the unsplittable + /// blocks themselves. + template + iterator_range> + getTransitiveUnwindDests() const { + return UnwindDestIterator::range(getUnwindDest()); + } + /// getLandingPadInst - Get the landingpad instruction from the landing pad /// block (the unwind destination). LandingPadInst *getLandingPadInst() const; @@ -3915,6 +3926,16 @@ setOperand(1, UnwindDest); } + /// getTransitiveUnwindDests - get an iterator that visits all EH pads + /// this catchswitch may reach by unwinding through its unwind edge and any + /// ensuing unsplittable blocks, optionally visiting the unsplittable + /// blocks themselves. + template + iterator_range> + getTransitiveUnwindDests() const { + return UnwindDestIterator::range(getUnwindDest()); + } + /// getNumHandlers - return the number of 'handlers' in this catchswitch /// instruction, except the default handler unsigned getNumHandlers() const { @@ -4244,6 +4265,16 @@ Op<1>() = NewDest; } + /// getTransitiveUnwindDests - get an iterator that visits all EH pads + /// this cleanupret may reach by unwinding through its unwind edge and any + /// ensuing unsplittable blocks, optionally visiting the unsplittable + /// blocks themselves. + template + iterator_range> + getTransitiveUnwindDests() const { + return UnwindDestIterator::range(getUnwindDest()); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::CleanupRet); @@ -4822,6 +4853,102 @@ } }; +/// Iterator for visiting all EH pads that an instruction with an unwind +/// destination may reach by unwinding through its unwind edge and any +/// ensuing unsplittable blocks, optionally visiting the unsplittable +/// blocks themselves. Destinations are visited in depth-first pre-order. +template class UnwindDestIterator { + /// Construct default iterator, suitable as end() for any iteration. + explicit UnwindDestIterator() {} + + /// Construct iterator suitable as begin() for iterating transitive + /// unwind dests of an instruction whose immediate unwind dest is + /// the given BasicBlock. + explicit UnwindDestIterator(BasicBlock *UnwindDest) { + visitUnwindDest(UnwindDest); + } + + /// CurrentInstr is the first non-PHI of the transitive unwind dest + /// at the current position. + Instruction *CurrentInstr = nullptr; + + /// If currently visiting a catchpad, CurrentHandler is the handler + /// iterator on the parent catchswitch referencing that catchpad. + /// Otherwise, CurrentHandler is None. When visiting a catchswitch + /// itself, CurrentInstr is the catchswitch and CurrentHandler is None. + llvm::Optional CurrentHandler; + + /// Set the iterator position to the given unwind destination, skipping + /// over any unsplittable blocks if SkipUnsplittable is true. + /// \param UnwindDest New dest to visit. May be \p nullptr to set + /// iterator to end state. + void visitUnwindDest(BasicBlock *UnwindDest) { + assert(!CurrentHandler); + if (!UnwindDest) { + CurrentInstr = nullptr; + return; + } + CurrentInstr = UnwindDest->getFirstNonPHI(); + if (SkipUnsplittable) + if (auto *CatchSwitch = dyn_cast(CurrentInstr)) + visitFirstHandler(CatchSwitch); + } + + /// Begin visiting the catchpads under a catchswitch. + void visitFirstHandler(CatchSwitchInst *CatchSwitch) { + CurrentHandler = CatchSwitch->handler_begin(); + CurrentInstr = (**CurrentHandler)->getFirstNonPHI(); + } + +public: + BasicBlock *operator*() { return CurrentInstr->getParent(); } + + UnwindDestIterator &operator++() { + if (CurrentHandler) { + CatchSwitchInst *CatchSwitch = + cast(CurrentInstr)->getCatchSwitch(); + if (++(*CurrentHandler) != CatchSwitch->handler_end()) { + CurrentInstr = (**CurrentHandler)->getFirstNonPHI(); + return *this; + } + CurrentHandler.reset(); + visitUnwindDest(CatchSwitch->getUnwindDest()); + return *this; + } + if (auto *CatchSwitch = dyn_cast(CurrentInstr)) { + visitFirstHandler(CatchSwitch); + return *this; + } + CurrentInstr = nullptr; + return *this; + } + + bool operator==(const UnwindDestIterator &Other) const { + if (CurrentInstr != Other.CurrentInstr) + return false; + if (CurrentHandler.hasValue() != Other.CurrentHandler.hasValue()) + return false; + if (CurrentHandler && + (CurrentHandler.getValue() != Other.CurrentHandler.getValue())) + return false; + return true; + } + + bool operator!=(const UnwindDestIterator &Other) const { + return !operator==(Other); + } + + /// Get the iterator range for visiting transitive unwind destinations of an + /// instruction whose immediate unwind destination is \p UnwindDest, in + /// depth-first pre-order. + /// \param UnwindDest First uwnind destination to visit. May be null for + /// empty iteration. + static iterator_range range(BasicBlock *UnwindDest) { + return iterator_range(UnwindDestIterator(UnwindDest), + UnwindDestIterator()); + } +}; + } // End llvm namespace #endif Index: include/llvm/IR/Statepoint.h =================================================================== --- include/llvm/IR/Statepoint.h +++ include/llvm/IR/Statepoint.h @@ -330,6 +330,8 @@ // This takes care both of relocates for call statepoints and relocates // on normal path of invoke statepoint. if (!isa(Token)) { + assert(!cast(Token)->isEHPad() && + "Funclet pads don't support 1:1 relocate:statepoint mapping"); return cast(Token); } @@ -390,13 +392,23 @@ return Result; // We need to scan thorough exceptional relocations if it is invoke statepoint - LandingPadInst *LandingPad = - cast(getInstruction())->getLandingPadInst(); - - // Search for gc relocates that are attached to this landingpad. - for (const User *LandingPadUser : LandingPad->users()) { - if (auto *Relocate = dyn_cast(LandingPadUser)) - Result.push_back(Relocate); + const InvokeInst *Invoke = cast(getInstruction()); + if (Invoke->getUnwindDest()->isLandingPad()) { + LandingPadInst *LandingPad = + cast(getInstruction())->getLandingPadInst(); + + // Search for gc relocates that are attached to this landingpad. + for (const User *LandingPadUser : LandingPad->users()) { + if (auto *Relocate = dyn_cast(LandingPadUser)) + Result.push_back(Relocate); + } +#ifndef NDEBUG + } else { + for (auto *UnwindDest : Invoke->getTransitiveUnwindDests()) + for (auto *U : UnwindDest->getFirstNonPHI()->users()) + assert(!isa(U) && + "Relocates on funclet EH not supported"); +#endif // NDEBUG } return Result; } Index: lib/Transforms/Scalar/RewriteStatepointsForGC.cpp =================================================================== --- lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -72,6 +72,11 @@ cl::location(ClobberNonLive), cl::Hidden); +static cl::opt SpillOnExceptionPath("rs4gc-spill-on-exception-path", + cl::Hidden, cl::init(false)); +static cl::opt SpillOnNormalPath("rs4gc-spill-on-normal-path", cl::Hidden, + cl::init(false)); + static cl::opt AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", cl::Hidden, cl::init(true)); @@ -169,17 +174,19 @@ // Generally, after the execution of a full findBasePointer call, only the // base relation will remain. Internally, we add a mixture of the two // types, then update all the second type to the first type +typedef DenseMap BaseMapTy; typedef DenseMap DefiningValueMapTy; typedef DenseSet StatepointLiveSetTy; typedef DenseMap, AssertingVH> - RematerializedValueMapTy; + ReconstitutedValueMapTy; +typedef DenseMap BaseMapMapTy; struct PartiallyConstructedSafepointRecord { /// The set of values known to be live across this safepoint StatepointLiveSetTy LiveSet; /// Mapping from live pointers to a base-defining-value - DenseMap PointerToBase; + BaseMapTy PointerToBase; /// The *new* gc.statepoint instruction itself. This produces the token /// that normal path gc.relocates and the gc.result are tied to. @@ -191,8 +198,17 @@ /// Record live values we are rematerialized instead of relocating. /// They are not included into 'LiveSet' field. - /// Maps rematerialized copy to it's original value. - RematerializedValueMapTy RematerializedValues; + /// Maps rematerialized copy to its original value. + ReconstitutedValueMapTy RematerializedValues; + + /// Record fills where we spilled a live value instead of relocating. + /// They are not included into 'LiveSet' field. + /// Maps loaded copy to its original value. + ReconstitutedValueMapTy ReloadedValues; + + /// Record spill slots holding live values spilled across this statepoint. + /// These need to be reported to the GC for relocation. + SmallVector SpillSlots; }; } @@ -211,12 +227,15 @@ /// Compute the live-in set for every basic block in the function static void computeLiveInValues(DominatorTree &DT, Function &F, - GCPtrLivenessData &Data); + GCPtrLivenessData &Data, + const BaseMapMapTy *BaseMaps = nullptr); /// Given results from the dataflow liveness computation, find the set of live /// Values at a particular instruction. -static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, - StatepointLiveSetTy &out); +static void findLiveSetAtStatepoint(CallSite Statepoint, + GCPtrLivenessData &Data, + const BaseMapMapTy *BaseMaps, + StatepointLiveSetTy &out); // TODO: Once we can get to the GCStrategy, this becomes // Optional isGCManagedPointer(const Type *Ty) const override { @@ -291,16 +310,17 @@ } // Conservatively identifies any definitions which might be live at the -// given instruction. The analysis is performed immediately before the -// given instruction. Values defined by that instruction are not considered -// live. Values used by that instruction are considered live. +// given instruction. Deopt arguments are treated specially, and considered +// live at the given parse point even though they appear in its argument +// list, to ensure they are reported/relocated. static void analyzeParsePointLiveness( DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, const CallSite &CS, PartiallyConstructedSafepointRecord &result) { - Instruction *inst = CS.getInstruction(); StatepointLiveSetTy LiveSet; - findLiveSetAtInst(inst, OriginalLivenessData, LiveSet); + assert(result.PointerToBase.empty() && + "Not expecting bases to be computed yet"); + findLiveSetAtStatepoint(CS, OriginalLivenessData, nullptr, LiveSet); if (PrintLiveSet) { // Note: This output is used by several of the test cases @@ -1191,8 +1211,9 @@ /// Given an updated version of the dataflow liveness results, update the /// liveset and base pointer maps for the call site CS. static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, + const BaseMapMapTy &BaseMaps, const CallSite &CS, - PartiallyConstructedSafepointRecord &result); + PartiallyConstructedSafepointRecord &Info); static void recomputeLiveInValues( Function &F, DominatorTree &DT, ArrayRef toUpdate, @@ -1200,11 +1221,21 @@ // TODO-PERF: reuse the original liveness, then simply run the dataflow // again. The old values are still live and will help it stabilize quickly. GCPtrLivenessData RevisedLivenessData; - computeLiveInValues(DT, F, RevisedLivenessData); + // The liveness walk needs to recognize when it visits a statepoint, and + // add to the live gens the base pointers of any derived pointers which + // are live across said statepoint. Pass it a map it can use to detect + // and compute this, whose keys are the statepoint instructions and + // whose values point to their PointerToBase maps. + BaseMapMapTy BaseMaps; + for (size_t I = 0; I < records.size(); I++) { + Instruction *Statepoint = toUpdate[I].getInstruction(); + BaseMaps[Statepoint] = &records[I].PointerToBase; + } + computeLiveInValues(DT, F, RevisedLivenessData, &BaseMaps); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; const CallSite &CS = toUpdate[i]; - recomputeLiveInValues(RevisedLivenessData, CS, info); + recomputeLiveInValues(RevisedLivenessData, BaseMaps, CS, info); } } @@ -1370,6 +1401,134 @@ }; } +// Generate spill slots for values that get spilled rather than +// relocated/rematerialized. +typedef DenseMap SpillMapTy; +typedef DenseMap> FillMapTy; +static void StabilizeOrder(SmallVectorImpl &LiveVec); +static void generateSpills(SpillMapTy &SpillMap, FillMapTy &FillMap, + CallSite CS, + PartiallyConstructedSafepointRecord &Record) { + assert(SpillOnNormalPath || SpillOnExceptionPath); + + // Calls have no exception path. + if (CS.isCall() && !SpillOnNormalPath) + return; + + Function *F = CS.getInstruction()->getParent()->getParent(); + Instruction *AllocaInsertBefore = &F->getEntryBlock().front(); + + // Copy the live set to a vector for "determinism" (see FIXME at + // StabilizeOrder). + // Convert to vector for efficient cross referencing. + SmallVector LiveVec; + LiveVec.reserve(Record.LiveSet.size()); + LiveVec.append(Record.LiveSet.begin(), Record.LiveSet.end()); + StabilizeOrder(LiveVec); + + // First, generate spills. + // TODO: Be smarter about where spills are inserted to avoid redundant + // ones (which requires detecting liverange interferences for the PHI case, + // and ideally would consult profile weights to minimize store frequency). + Instruction *SpillInsertBefore = CS.getInstruction(); + auto generateSpill = [&](Value *Var, Value *Val) { + AllocaInst *&SpillSlot = SpillMap[Var]; + // When generating new spill slots, walk AllocaInsertBefore back + // to avoid mixing with stores if the first instruction in the + // function happens to be a statepoint. + if (!SpillSlot) + AllocaInsertBefore = SpillSlot = + new AllocaInst(Var->getType(), suffixed_name_or(Var, ".gc_spill", ""), + AllocaInsertBefore); + new StoreInst(Val, SpillSlot, SpillInsertBefore); + Record.SpillSlots.push_back(SpillSlot); + }; + + // TODO: Make the liveness information in the record more verbose so for + // invokes we can: + // 1) If spilling only across exception edges, spill only the values live + // into the EH pad(s). + // 2) If a value is live only until a phi source, spill that value just + // for the phi, instead of once for the phi and once for itself. + + // Spill all live-across values. + for (Value *Var : LiveVec) + generateSpill(Var, Var); + + // Spill any necessary incoming PHI values + MapVector IncomingPHIValueMap; + if (CS.isInvoke() && SpillOnExceptionPath) { + auto *Invoke = cast(CS.getInstruction()); + BasicBlock *Pred = Invoke->getParent(); + for (BasicBlock *Pad : Invoke->getTransitiveUnwindDests()) { + for (auto I = Pad->begin(); auto *PHI = dyn_cast(&*I); ++I) { + // Find the value to store for this PHI + Value *IncomingValue = PHI->getIncomingValueForBlock(Pred); + // If the incoming value was itself a PHI we've walked over, + // recurse to that incoming value + auto MapIter = IncomingPHIValueMap.find(IncomingValue); + if (MapIter != IncomingPHIValueMap.end()) + IncomingValue = MapIter->second; + // Record the incoming value in case we see a use of it in + // a subsequent PHI. + IncomingPHIValueMap[PHI] = IncomingValue; + // Generate the spill + generateSpill(PHI, IncomingValue); + } + // Update pred if we're going to visit this block's successors, + // which we'll do iff it's unsplittable. + if (isa(Pad->getFirstNonPHI())) + Pred = Pad; + } + } + + // Next, generate fills. + auto generateFill = [&](Value *Var, Instruction *LoadInsertBefore) { + auto *Fill = + new LoadInst(SpillMap[Var], suffixed_name_or(Var, ".gc_reload", ""), + LoadInsertBefore); + Record.ReloadedValues[Fill] = Var; + }; + + // Generate normal-path fills if necessary + if (SpillOnNormalPath) { + Instruction *LoadInsertBefore; + if (CS.isCall()) + LoadInsertBefore = CS.getInstruction()->getNextNode(); + else + LoadInsertBefore = + &cast(CS.getInstruction())->getNormalDest()->front(); + assert(!isa(LoadInsertBefore) && + "Expected normal critical edges to be split"); + for (Value *Var : LiveVec) + generateFill(Var, LoadInsertBefore); + } + + // Generate exception-path fills if necessary + if (CS.isInvoke() && SpillOnExceptionPath) { + auto ensureFill = [&](Value *Var, BasicBlock *Pad) { + // Don't redundantly reload the same var on behalf + // of multiple invoke predecessors. + auto &PadFills = FillMap[Pad]; + if (!PadFills.insert(Var).second) + return; + generateFill(Var, &*Pad->getFirstInsertionPt()); + }; + auto *Invoke = cast(CS.getInstruction()); + for (BasicBlock *Pad : Invoke->getTransitiveUnwindDests()) { + for (Value *Var : LiveVec) + ensureFill(Var, Pad); + for (const auto &IncomingValuePair : IncomingPHIValueMap) + ensureFill(IncomingValuePair.first, Pad); + } + } + + // If we've spilled on all paths, we don't need to generate any relocates, + // so clear the live set for this statepoint. + if (SpillOnNormalPath && (CS.isCall() || SpillOnExceptionPath)) + Record.LiveSet.clear(); +} + static void makeStatepointExplicitImpl(const CallSite CS, /* to replace */ const SmallVectorImpl &BasePtrs, @@ -1385,7 +1544,19 @@ Instruction *InsertBefore = CS.getInstruction(); IRBuilder<> Builder(InsertBefore); - ArrayRef GCArgs(LiveVariables); + // The gc args are the concatenation of the live variables and the spill + // slots. + ArrayRef GCArgs; + SmallVector ConcatenatedArgs; + if (Result.SpillSlots.empty()) { + GCArgs = ArrayRef(LiveVariables); + } else if (LiveVariables.empty()) { + GCArgs = ArrayRef(Result.SpillSlots); + } else { + ConcatenatedArgs.append(LiveVariables.begin(), LiveVariables.end()); + ConcatenatedArgs.append(Result.SpillSlots.begin(), Result.SpillSlots.end()); + GCArgs = ArrayRef(ConcatenatedArgs); + } uint64_t StatepointID = 0xABCDEF00; uint32_t NumPatchBytes = 0; uint32_t Flags = uint32_t(StatepointFlags::None); @@ -1462,21 +1633,28 @@ Token = Invoke; // Generate gc relocates in exceptional path - BasicBlock *UnwindBlock = ToReplace->getUnwindDest(); - assert(!isa(UnwindBlock->begin()) && - UnwindBlock->getUniquePredecessor() && - "can't safely insert in this block!"); - - Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt()); - Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); - - // Attach exceptional gc relocates to the landingpad. - Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst(); - Result.UnwindToken = ExceptionalToken; - - const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx(); - CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken, - Builder); + if (SpillOnExceptionPath) { + Result.UnwindToken = nullptr; + } else { + for (BasicBlock *UnwindBlock : ToReplace->getTransitiveUnwindDests()) { + assert(!isa(UnwindBlock->begin()) && + UnwindBlock->getUniquePredecessor() && + "can't safely insert in this block!"); + + Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); + + // Attach exceptional gc relocates to the landingpad. + Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst(); + assert(Result.UnwindToken == nullptr && + "Cannot report multiple unwind tokens"); + Result.UnwindToken = ExceptionalToken; + + const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx(); + CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, + ExceptionalToken, Builder); + } + } // Generate gc relocates and returns for normal block BasicBlock *NormalDest = ToReplace->getNormalDest(); @@ -1516,6 +1694,13 @@ CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder); } +// FIXME: This gives nondeterministic output when values are nameless +static void StabilizeOrder(SmallVectorImpl &LiveVec) { + std::sort(LiveVec.begin(), LiveVec.end(), [](const Value *L, const Value *R) { + return L->getName() < R->getName(); + }); +} +// FIXME: This gives nondeterministic output when values are nameless static void StabilizeOrder(SmallVectorImpl &BaseVec, SmallVectorImpl &LiveVec) { assert(BaseVec.size() == LiveVec.size()); @@ -1616,23 +1801,23 @@ } // Helper function for the "relocationViaAlloca". Similar to the -// "insertRelocationStores" but works for rematerialized values. +// "insertRelocationStores" but works for reconstituted values +// (i.e. rematerialized values and reloads of spilled values). static void -insertRematerializationStores( - RematerializedValueMapTy RematerializedValues, - DenseMap &AllocaMap, - DenseSet &VisitedLiveValues) { +insertReconstitutionStores(const ReconstitutedValueMapTy &ReconstitutedValues, + DenseMap &AllocaMap, + DenseSet &VisitedLiveValues) { - for (auto RematerializedValuePair: RematerializedValues) { - Instruction *RematerializedValue = RematerializedValuePair.first; - Value *OriginalValue = RematerializedValuePair.second; + for (auto ReconstitutedValuePair : ReconstitutedValues) { + Instruction *ReconstitutedValue = ReconstitutedValuePair.first; + Value *OriginalValue = ReconstitutedValuePair.second; assert(AllocaMap.count(OriginalValue) && - "Can not find alloca for rematerialized value"); + "Can not find alloca for reconstituted value"); Value *Alloca = AllocaMap[OriginalValue]; - StoreInst *Store = new StoreInst(RematerializedValue, Alloca); - Store->insertAfter(RematerializedValue); + StoreInst *Store = new StoreInst(ReconstitutedValue, Alloca); + Store->insertAfter(ReconstitutedValue); #ifndef NDEBUG VisitedLiveValues.insert(OriginalValue); @@ -1659,6 +1844,7 @@ SmallVector PromotableAllocas; // Used later to chack that we have enough allocas to store all values std::size_t NumRematerializedValues = 0; + std::size_t NumSpilledValues = 0; PromotableAllocas.reserve(Live.size()); // Emit alloca for "LiveValue" and record it in "allocaMap" and @@ -1674,8 +1860,8 @@ for (Value *V : Live) emitAllocaFor(V); - // Emit allocas for rematerialized values - for (const auto &Info : Records) + // Emit allocas for reconstituted values + for (const auto &Info : Records) { for (auto RematerializedValuePair : Info.RematerializedValues) { Value *OriginalValue = RematerializedValuePair.second; if (AllocaMap.count(OriginalValue) != 0) @@ -1685,6 +1871,16 @@ ++NumRematerializedValues; } + for (auto SpilledValuePair : Info.ReloadedValues) { + Value *OriginalValue = SpilledValuePair.second; + if (AllocaMap.count(OriginalValue) != 0) + continue; + + emitAllocaFor(OriginalValue); + ++NumSpilledValues; + } + } + // The next two loops are part of the same conceptual operation. We need to // insert a store to the alloca after the original def and at each // redefinition. We need to insert a load before each use. These are split @@ -1705,14 +1901,18 @@ // In case if it was invoke statepoint // we will insert stores for exceptional path gc relocates. - if (isa(Statepoint)) { + if (isa(Statepoint) && Info.UnwindToken) { insertRelocationStores(Info.UnwindToken->users(), AllocaMap, VisitedLiveValues); } // Do similar thing with rematerialized values - insertRematerializationStores(Info.RematerializedValues, AllocaMap, - VisitedLiveValues); + insertReconstitutionStores(Info.RematerializedValues, AllocaMap, + VisitedLiveValues); + + // And with reloads + insertReconstitutionStores(Info.ReloadedValues, AllocaMap, + VisitedLiveValues); if (ClobberNonLive) { // As a debugging aid, pretend that an unrelocated pointer becomes null at @@ -1745,7 +1945,8 @@ // gc.results and gc.relocates, but that's fine. if (auto II = dyn_cast(Statepoint)) { InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt()); - InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt()); + for (BasicBlock *UnwindDest : II->getTransitiveUnwindDests()) + InsertClobbersAt(&*UnwindDest->getFirstInsertionPt()); } else { InsertClobbersAt(cast(Statepoint)->getNextNode()); } @@ -1816,7 +2017,8 @@ } } - assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues && + assert(PromotableAllocas.size() == + Live.size() + NumRematerializedValues + NumSpilledValues && "we must have the same allocas with lives"); if (!PromotableAllocas.empty()) { // Apply mem2reg to promote alloca to SSA @@ -1841,33 +2043,6 @@ }), Vec.end()); } -/// Insert holders so that each Value is obviously live through the entire -/// lifetime of the call. -static void insertUseHolderAfter(CallSite &CS, const ArrayRef Values, - SmallVectorImpl &Holders) { - if (Values.empty()) - // No values to hold live, might as well not insert the empty holder - return; - - Module *M = CS.getInstruction()->getModule(); - // Use a dummy vararg function to actually hold the values live - Function *Func = cast(M->getOrInsertFunction( - "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true))); - if (CS.isCall()) { - // For call safepoints insert dummy calls right after safepoint - Holders.push_back(CallInst::Create(Func, Values, "", - &*++CS.getInstruction()->getIterator())); - return; - } - // For invoke safepooints insert dummy calls both in normal and - // exceptional destination blocks - auto *II = cast(CS.getInstruction()); - Holders.push_back(CallInst::Create( - Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt())); - Holders.push_back(CallInst::Create( - Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt())); -} - static void findLiveReferences( Function &F, DominatorTree &DT, ArrayRef toUpdate, MutableArrayRef records) { @@ -2185,16 +2360,16 @@ Instruction *NormalInsertBefore = &*Invoke->getNormalDest()->getFirstInsertionPt(); - Instruction *UnwindInsertBefore = - &*Invoke->getUnwindDest()->getFirstInsertionPt(); - Instruction *NormalRematerializedValue = rematerializeChain(NormalInsertBefore); - Instruction *UnwindRematerializedValue = - rematerializeChain(UnwindInsertBefore); - Info.RematerializedValues[NormalRematerializedValue] = LiveValue; - Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; + + for (BasicBlock *UnwindDest : Invoke->getTransitiveUnwindDests()) { + Instruction *UnwindInsertBefore = &*UnwindDest->getFirstInsertionPt(); + Instruction *UnwindRematerializedValue = + rematerializeChain(UnwindInsertBefore); + Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; + } } } @@ -2219,35 +2394,15 @@ // When inserting gc.relocates for invokes, we need to be able to insert at // the top of the successor blocks. See the comment on - // normalForInvokeSafepoint on exactly what is needed. Note that this step + // normalizeForInvokeSafepoint on exactly what is needed. Note that this step // may restructure the CFG. for (CallSite CS : ToUpdate) { if (!CS.isInvoke()) continue; auto *II = cast(CS.getInstruction()); normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT); - normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT); - } - - // A list of dummy calls added to the IR to keep various values obviously - // live in the IR. We'll remove all of these when done. - SmallVector Holders; - - // Insert a dummy call with all of the arguments to the vm_state we'll need - // for the actual safepoint insertion. This ensures reference arguments in - // the deopt argument list are considered live through the safepoint (and - // thus makes sure they get relocated.) - for (CallSite CS : ToUpdate) { - SmallVector DeoptValues; - - for (Value *Arg : GetDeoptBundleOperands(CS)) { - assert(!isUnhandledGCPointerType(Arg->getType()) && - "support for FCA unimplemented"); - if (isHandledGCPointerType(Arg->getType())) - DeoptValues.push_back(Arg); - } - - insertUseHolderAfter(CS, DeoptValues, Holders); + if (!SpillOnExceptionPath) + normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT); } SmallVector Records(ToUpdate.size()); @@ -2269,7 +2424,8 @@ } } // end of cache scope - // The base phi insertion logic (for any safepoint) may have inserted new + // By selecting base pointers, we've effectively inserted new uses, because + // the base phi insertion logic (for any safepoint) may have inserted new // instructions which are now live at some safepoint. The simplest such // example is: // loop: @@ -2278,24 +2434,8 @@ // gep a + 1 // safepoint 2 // br loop - // We insert some dummy calls after each safepoint to definitely hold live - // the base pointers which were identified for that safepoint. We'll then - // ask liveness for _every_ base inserted to see what is now live. Then we - // remove the dummy calls. - Holders.reserve(Holders.size() + Records.size()); - for (size_t i = 0; i < Records.size(); i++) { - PartiallyConstructedSafepointRecord &Info = Records[i]; - - SmallVector Bases; - for (auto Pair : Info.PointerToBase) - Bases.push_back(Pair.second); - - insertUseHolderAfter(ToUpdate[i], Bases, Holders); - } - - // By selecting base pointers, we've effectively inserted new uses. Thus, we - // need to rerun liveness. We may *also* have inserted new defs, but that's - // not the key issue. + // Thus, we need to rerun liveness. We may *also* have inserted new defs, + // but that's not the key issue. recomputeLiveInValues(F, DT, ToUpdate, Records); if (PrintBasePointers) { @@ -2324,11 +2464,6 @@ if (isa(BasePair.second)) Info.LiveSet.erase(BasePair.first); - for (CallInst *CI : Holders) - CI->eraseFromParent(); - - Holders.clear(); - // Do a limited scalarization of any live at safepoint vector values which // contain pointers. This enables this pass to run after vectorization at // the cost of some possible performance loss. Note: This is known to not @@ -2351,6 +2486,14 @@ for (size_t i = 0; i < Records.size(); i++) rematerializeLiveValues(ToUpdate[i], Records[i], TTI); + // Generate spills/fills dictated by the strategy. + if (SpillOnNormalPath || SpillOnExceptionPath) { + SpillMapTy SpillMap; + FillMapTy FillMap; + for (size_t I = 0; I < Records.size(); ++I) + generateSpills(SpillMap, FillMap, ToUpdate[I], Records[I]); + } + // We need this to safely RAUW and delete call or invoke return values that // may themselves be live over a statepoint. For details, please see usage in // makeStatepointExplicitImpl. @@ -2394,10 +2537,18 @@ // the live values might be the result of a call which needs a safepoint. // That Value* no longer exists and we need to use the new gc_result. // Thankfully, the live set is embedded in the statepoint (and updated), so - // we just grab that. + // we just grab that. The only bit of trickery is that the "gc args" of + // the statepoint are the concatenation of the relocated values (which need + // to be inserted in the "live" set and the spill slots for spilled values + // (which should *not* be inserted in the "live" set), so we have to find + // the split point. Statepoint Statepoint(Info.StatepointToken); - Live.insert(Live.end(), Statepoint.gc_args_begin(), - Statepoint.gc_args_end()); + auto LiveArgsBegin = Statepoint.gc_args_begin(); + auto GcArgsEnd = Statepoint.gc_args_end(); + auto LiveArgsEnd = LiveArgsBegin; + while (LiveArgsEnd != GcArgsEnd && !isa(*LiveArgsEnd)) + ++LiveArgsEnd; + Live.insert(Live.end(), LiveArgsBegin, LiveArgsEnd); #ifndef NDEBUG // Do some basic sanity checks on our liveness results before performing // relocation. Relocation can and will turn mistakes in liveness results @@ -2624,11 +2775,18 @@ // TODO: Consider using bitvectors for liveness, the set of potentially // interesting values should be small and easy to pre-compute. +/// Add the base pointers from the given base map to the given live set. +static void insertBases(BaseMapTy &BaseMap, StatepointLiveSetTy &LiveSet) { + for (auto &Pair : BaseMap) + LiveSet.insert(Pair.second); +} + /// Compute the live-in set for the location rbegin starting from /// the live-out set of the basic block static void computeLiveInValues(BasicBlock::reverse_iterator rbegin, BasicBlock::reverse_iterator rend, - DenseSet &LiveTmp) { + const BaseMapMapTy *BaseMaps, + StatepointLiveSetTy &LiveTmp) { for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) { Instruction *I = &*ritr; @@ -2659,6 +2817,14 @@ LiveTmp.insert(V); } } + + // If we've computed live references and the current instruction happens + // to be a statepoint, add its bases to the live set. + if (BaseMaps && CallSite(I)) { + auto BaseMapIter = BaseMaps->find(I); + if (BaseMapIter != BaseMaps->end()) + insertBases(*BaseMapIter->second, LiveTmp); + } } } @@ -2716,7 +2882,8 @@ #endif static void computeLiveInValues(DominatorTree &DT, Function &F, - GCPtrLivenessData &Data) { + GCPtrLivenessData &Data, + const BaseMapMapTy *BaseMaps) { SmallSetVector Worklist; auto AddPredsToWorklist = [&](BasicBlock *BB) { @@ -2733,7 +2900,7 @@ for (BasicBlock &BB : F) { Data.KillSet[&BB] = computeKillSet(&BB); Data.LiveSet[&BB].clear(); - computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); + computeLiveInValues(BB.rbegin(), BB.rend(), BaseMaps, Data.LiveSet[&BB]); #ifndef NDEBUG for (Value *Kill : Data.KillSet[&BB]) @@ -2793,31 +2960,50 @@ #endif } -static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, - StatepointLiveSetTy &Out) { - +static void findLiveSetAtStatepoint(CallSite Statepoint, + GCPtrLivenessData &Data, + const BaseMapMapTy *BaseMaps, + StatepointLiveSetTy &Out) { + Instruction *Inst = Statepoint.getInstruction(); BasicBlock *BB = Inst->getParent(); // Note: The copy is intentional and required assert(Data.LiveOut.count(BB)); DenseSet LiveOut = Data.LiveOut[BB]; - // We want to handle the statepoint itself oddly. It's - // call result is not live (normal), nor are it's arguments - // (unless they're used again later). This adjustment is - // specifically what we need to relocate + // We want to handle the statepoint itself oddly. Its + // call result is not live (normal), nor are its non-deopt + // arguments (unless they're used again later). This + // adjustment is specifically what we need to relocate. BasicBlock::reverse_iterator rend(Inst->getIterator()); - computeLiveInValues(BB->rbegin(), rend, LiveOut); + computeLiveInValues(BB->rbegin(), rend, BaseMaps, LiveOut); LiveOut.erase(Inst); Out.insert(LiveOut.begin(), LiveOut.end()); + + // Ensure reference arguments in the deopt argument list are considered live + // through the safepoint (and thus make sure they get relocated.) + for (Value *Arg : GetDeoptBundleOperands(Statepoint)) { + assert(!isUnhandledGCPointerType(Arg->getType()) && + "support for FCA unimplemented"); + if (isHandledGCPointerType(Arg->getType())) + Out.insert(Arg); + } + + // Likewise ensure that bases of derived pointers live through + // the statepoint are considered live through it as well. + if (BaseMaps) + insertBases(*BaseMaps->lookup(Inst), Out); } static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, + const BaseMapMapTy &BaseMaps, const CallSite &CS, PartiallyConstructedSafepointRecord &Info) { - Instruction *Inst = CS.getInstruction(); + // TODO: update this to collect separate live sets for normal and exception + // paths; when spilling on exception path but not normal path, this lets us + // skip spilling values that are live only on the normal path. StatepointLiveSetTy Updated; - findLiveSetAtInst(Inst, RevisedLivenessData, Updated); + findLiveSetAtStatepoint(CS, RevisedLivenessData, &BaseMaps, Updated); #ifndef NDEBUG DenseSet Bases; Index: unittests/IR/InstructionsTest.cpp =================================================================== --- unittests/IR/InstructionsTest.cpp +++ unittests/IR/InstructionsTest.cpp @@ -168,6 +168,86 @@ delete bb1; } +template +static void checkBlockIteration(ArrayRef Expected, + ItRange Range) { + auto Begin = Range.begin(), End = Range.end(); + size_t I = 0; + for (auto Iter = Begin; Iter != End; ++Iter) + EXPECT_EQ(Expected[I++], *Iter); + EXPECT_EQ(Expected.size(), I); +}; + +TEST(InstructionsTest, UnwindDestIterator) { + LLVMContext &C(getGlobalContext()); + + // Make BasicBlocks to model exceptional flow. + std::unique_ptr SwitchBlock1(BasicBlock::Create(C)); + std::unique_ptr SwitchBlock2(BasicBlock::Create(C)); + std::unique_ptr CatchBlock1(BasicBlock::Create(C)); + std::unique_ptr CatchBlock2(BasicBlock::Create(C)); + std::unique_ptr CatchBlock3(BasicBlock::Create(C)); + std::unique_ptr CleanupBlock(BasicBlock::Create(C)); + + // Generate EH pads in the BasicBlocks. + auto *NoToken = ConstantTokenNone::get(C); + auto *Switch1 = CatchSwitchInst::Create(NoToken, SwitchBlock2.get(), 1, + "switch1", SwitchBlock1.get()); + auto *Switch2 = CatchSwitchInst::Create(NoToken, CleanupBlock.get(), 2, + "switch2", SwitchBlock2.get()); + CleanupPadInst::Create(NoToken, {}, "cleanup", CleanupBlock.get()); + CatchPadInst::Create(Switch1, {}, "catch1", CatchBlock1.get()); + CatchPadInst::Create(Switch2, {}, "catch2", CatchBlock2.get()); + CatchPadInst::Create(Switch2, {}, "catch3", CatchBlock3.get()); + + // Hook up the handlers to the switches. + Switch1->addHandler(CatchBlock1.get()); + Switch2->addHandler(CatchBlock2.get()); + Switch2->addHandler(CatchBlock3.get()); + + // Note the expected visit order with and without visiting unsplittable + // blocks. + BasicBlock *SplittableDests[] = {CatchBlock1.get(), CatchBlock2.get(), + CatchBlock3.get(), CleanupBlock.get()}; + BasicBlock *AllDests[] = {SwitchBlock1.get(), CatchBlock1.get(), + SwitchBlock2.get(), CatchBlock2.get(), + CatchBlock3.get(), CleanupBlock.get()}; + + // Give all the blocks terminators to make them well-formed. + for (BasicBlock *Block : SplittableDests) + new UnreachableInst(C, Block); + + // Create and test an invoke. + auto *FnTy = FunctionType::get(Type::getVoidTy(C), false); + auto *FnPtr = ConstantPointerNull::get(FnTy->getPointerTo()); + std::unique_ptr NormalDest(BasicBlock::Create(C)); + std::unique_ptr Invoke(InvokeInst::Create( + FnTy, FnPtr, NormalDest.get(), SwitchBlock1.get(), {})); + checkBlockIteration(SplittableDests, Invoke->getTransitiveUnwindDests()); + checkBlockIteration(AllDests, Invoke->getTransitiveUnwindDests()); + + // Create and test a cleanupret. + std::unique_ptr DummyPad(CleanupPadInst::Create(NoToken, {})); + std::unique_ptr CleanupRet( + CleanupReturnInst::Create(DummyPad.get(), SwitchBlock1.get())); + checkBlockIteration(SplittableDests, CleanupRet->getTransitiveUnwindDests()); + checkBlockIteration(AllDests, CleanupRet->getTransitiveUnwindDests()); + + // Create and test a catchswitch. + std::unique_ptr TestSwitch( + CatchSwitchInst::Create(NoToken, SwitchBlock1.get(), 1)); + std::unique_ptr TestCatchBlock(BasicBlock::Create(C)); + CatchPadInst::Create(TestSwitch.get(), {}, "testcatch", TestCatchBlock.get()); + TestSwitch->addHandler(TestCatchBlock.get()); + checkBlockIteration(SplittableDests, TestSwitch->getTransitiveUnwindDests()); + checkBlockIteration(AllDests, TestSwitch->getTransitiveUnwindDests()); + + // Break reference cycles before deleting IR. + for (auto *Block : AllDests) + Block->dropAllReferences(); + TestSwitch->dropAllReferences(); +} + TEST(InstructionsTest, CastInst) { LLVMContext &C(getGlobalContext());