diff --git a/llvm/docs/Statepoints.rst b/llvm/docs/Statepoints.rst --- a/llvm/docs/Statepoints.rst +++ b/llvm/docs/Statepoints.rst @@ -556,10 +556,6 @@ might contain a safepoint poll with a ``gc.statepoint`` and associated full relocation sequence, including all required ``gc.relocates``. -Note that by default, this pass only runs for the "statepoint-example" or -"core-clr" gc strategies. You will need to add your custom strategy to this -list or use one of the predefined ones. - As an example, given this code: .. code-block:: llvm @@ -583,8 +579,11 @@ In the above examples, the addrspace(1) marker on the pointers is the mechanism that the ``statepoint-example`` GC strategy uses to distinguish references from -non references. The pass assumes that all addrspace(1) pointers are non-integral -pointer types. Address space 1 is not globally reserved for this purpose. +non references. This is controlled via GCStrategy::isGCManagedPointer. The +``statepoint-example`` and ``coreclr`` strategies (the only two default +strategies that support statepoints) both use addrspace(1) to determine which +pointers are references, however custom strategies don't have to follow this +convention. This pass can be used an utility function by a language frontend that doesn't want to manually reason about liveness, base pointers, or relocation when diff --git a/llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h b/llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h --- a/llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h +++ b/llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h @@ -25,12 +25,13 @@ class Module; class TargetTransformInfo; class TargetLibraryInfo; +class GCStrategy; struct RewriteStatepointsForGC : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); bool runOnFunction(Function &F, DominatorTree &, TargetTransformInfo &, - const TargetLibraryInfo &); + const TargetLibraryInfo &, GCStrategy *); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GCStrategy.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" @@ -125,7 +126,10 @@ /// constant physical memory: llvm.invariant.start. static void stripNonValidData(Module &M); -static bool shouldRewriteStatepointsIn(Function &F); +// Find the GC strategy for a function, or null if it doesn't have one. +static std::unique_ptr findGCStrategy(Function &F); + +static bool shouldRewriteStatepointsIn(Function &F, GCStrategy *GC); PreservedAnalyses RewriteStatepointsForGC::run(Module &M, ModuleAnalysisManager &AM) { @@ -136,15 +140,17 @@ if (F.isDeclaration() || F.empty()) continue; + std::unique_ptr GC = findGCStrategy(F); + // Policy choice says not to rewrite - the most common reason is that we're // compiling code without a GCStrategy. - if (!shouldRewriteStatepointsIn(F)) + if (!shouldRewriteStatepointsIn(F, GC.get())) continue; auto &DT = FAM.getResult(F); auto &TTI = FAM.getResult(F); auto &TLI = FAM.getResult(F); - Changed |= runOnFunction(F, DT, TTI, TLI); + Changed |= runOnFunction(F, DT, TTI, TLI, GC.get()); } if (!Changed) return PreservedAnalyses::all(); @@ -180,9 +186,11 @@ if (F.isDeclaration() || F.empty()) continue; + std::unique_ptr GC = findGCStrategy(F); + // Policy choice says not to rewrite - the most common reason is that // we're compiling code without a GCStrategy. - if (!shouldRewriteStatepointsIn(F)) + if (!shouldRewriteStatepointsIn(F, GC.get())) continue; TargetTransformInfo &TTI = @@ -191,7 +199,7 @@ getAnalysis().getTLI(F); auto &DT = getAnalysis(F).getDomTree(); - Changed |= Impl.runOnFunction(F, DT, TTI, TLI); + Changed |= Impl.runOnFunction(F, DT, TTI, TLI, GC.get()); } if (!Changed) @@ -311,37 +319,36 @@ /// Compute the live-in set for every basic block in the function static void computeLiveInValues(DominatorTree &DT, Function &F, - GCPtrLivenessData &Data); + GCPtrLivenessData &Data, GCStrategy *GC); /// Given results from the dataflow liveness computation, find the set of live /// Values at a particular instruction. static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, - StatepointLiveSetTy &out); + StatepointLiveSetTy &out, GCStrategy *GC); -// TODO: Once we can get to the GCStrategy, this becomes -// std::optional isGCManagedPointer(const Type *Ty) const override { +static bool isGCPointerType(Type *T, GCStrategy *GC) { + assert(GC && "GC Strategy for isGCPointerType cannot be null"); -static bool isGCPointerType(Type *T) { - if (auto *PT = dyn_cast(T)) - // For the sake of this example GC, we arbitrarily pick addrspace(1) as our - // GC managed heap. We know that a pointer into this heap needs to be - // updated and that no other pointer does. - return PT->getAddressSpace() == 1; - return false; + if (!isa(T)) + return false; + + if (auto IsManaged = GC->isGCManagedPointer(T)) + return *IsManaged; + return true; // conservative - same as StatepointLowering } // Return true if this type is one which a) is a gc pointer or contains a GC // pointer and b) is of a type this code expects to encounter as a live value. // (The insertion code will assert that a type which matches (a) and not (b) // is not encountered.) -static bool isHandledGCPointerType(Type *T) { +static bool isHandledGCPointerType(Type *T, GCStrategy *GC) { // We fully support gc pointers - if (isGCPointerType(T)) + if (isGCPointerType(T, GC)) return true; // We partially support vectors of gc pointers. The code will assert if it // can't handle something. if (auto VT = dyn_cast(T)) - if (isGCPointerType(VT->getElementType())) + if (isGCPointerType(VT->getElementType(), GC)) return true; return false; } @@ -349,23 +356,24 @@ #ifndef NDEBUG /// Returns true if this type contains a gc pointer whether we know how to /// handle that type or not. -static bool containsGCPtrType(Type *Ty) { - if (isGCPointerType(Ty)) +static bool containsGCPtrType(Type *Ty, GCStrategy *GC) { + if (isGCPointerType(Ty, GC)) return true; if (VectorType *VT = dyn_cast(Ty)) - return isGCPointerType(VT->getScalarType()); + return isGCPointerType(VT->getScalarType(), GC); if (ArrayType *AT = dyn_cast(Ty)) - return containsGCPtrType(AT->getElementType()); + return containsGCPtrType(AT->getElementType(), GC); if (StructType *ST = dyn_cast(Ty)) - return llvm::any_of(ST->elements(), containsGCPtrType); + return llvm::any_of(ST->elements(), + [GC](Type *Ty) { return containsGCPtrType(Ty, GC); }); return false; } // Returns true if this is a type which a) is a gc pointer or contains a GC // pointer and b) is of a type which the code doesn't expect (i.e. first class // aggregates). Used to trip assertions. -static bool isUnhandledGCPointerType(Type *Ty) { - return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); +static bool isUnhandledGCPointerType(Type *Ty, GCStrategy *GC) { + return containsGCPtrType(Ty, GC) && !isHandledGCPointerType(Ty, GC); } #endif @@ -382,9 +390,9 @@ // live. Values used by that instruction are considered live. static void analyzeParsePointLiveness( DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call, - PartiallyConstructedSafepointRecord &Result) { + PartiallyConstructedSafepointRecord &Result, GCStrategy *GC) { StatepointLiveSetTy LiveSet; - findLiveSetAtInst(Call, OriginalLivenessData, LiveSet); + findLiveSetAtInst(Call, OriginalLivenessData, LiveSet, GC); if (PrintLiveSet) { dbgs() << "Live Variables:\n"; @@ -1385,20 +1393,21 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &result, - PointerToBaseTy &PointerToBase); + PointerToBaseTy &PointerToBase, + GCStrategy *GC); static void recomputeLiveInValues( Function &F, DominatorTree &DT, ArrayRef toUpdate, MutableArrayRef records, - PointerToBaseTy &PointerToBase) { + PointerToBaseTy &PointerToBase, GCStrategy *GC) { // TODO-PERF: reuse the original liveness, then simply run the dataflow // again. The old values are still live and will help it stabilize quickly. GCPtrLivenessData RevisedLivenessData; - computeLiveInValues(DT, F, RevisedLivenessData); + computeLiveInValues(DT, F, RevisedLivenessData, GC); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; - recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, - PointerToBase); + recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, PointerToBase, + GC); } } @@ -1522,7 +1531,7 @@ static void CreateGCRelocates(ArrayRef LiveVariables, ArrayRef BasePtrs, Instruction *StatepointToken, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, GCStrategy *GC) { if (LiveVariables.empty()) return; @@ -1542,8 +1551,8 @@ // towards a single unified pointer type anyways, we can just cast everything // to an i8* of the right address space. A bitcast is added later to convert // gc_relocate to the actual value's type. - auto getGCRelocateDecl = [&] (Type *Ty) { - assert(isHandledGCPointerType(Ty)); + auto getGCRelocateDecl = [&](Type *Ty) { + assert(isHandledGCPointerType(Ty, GC)); auto AS = Ty->getScalarType()->getPointerAddressSpace(); Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS); if (auto *VT = dyn_cast(Ty)) @@ -1668,7 +1677,8 @@ const SmallVectorImpl &LiveVariables, PartiallyConstructedSafepointRecord &Result, std::vector &Replacements, - const PointerToBaseTy &PointerToBase) { + const PointerToBaseTy &PointerToBase, + GCStrategy *GC) { assert(BasePtrs.size() == LiveVariables.size()); // Then go ahead and use the builder do actually do the inserts. We insert @@ -1901,7 +1911,7 @@ Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst(); Result.UnwindToken = ExceptionalToken; - CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder); + CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder, GC); // Generate gc relocates and returns for normal block BasicBlock *NormalDest = II->getNormalDest(); @@ -1947,7 +1957,7 @@ Result.StatepointToken = Token; // Second, create a gc.relocate for every live variable - CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder); + CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder, GC); } // Replace an existing gc.statepoint with a new one and a set of gc.relocates @@ -1959,7 +1969,7 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call, PartiallyConstructedSafepointRecord &Result, std::vector &Replacements, - const PointerToBaseTy &PointerToBase) { + const PointerToBaseTy &PointerToBase, GCStrategy *GC) { const auto &LiveSet = Result.LiveSet; // Convert to vector for efficient cross referencing. @@ -1976,7 +1986,7 @@ // Do the actual rewriting and delete the old statepoint makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements, - PointerToBase); + PointerToBase, GC); } // Helper function for the relocationViaAlloca. @@ -2277,12 +2287,13 @@ static void findLiveReferences( Function &F, DominatorTree &DT, ArrayRef toUpdate, - MutableArrayRef records) { + MutableArrayRef records, + GCStrategy *GC) { GCPtrLivenessData OriginalLivenessData; - computeLiveInValues(DT, F, OriginalLivenessData); + computeLiveInValues(DT, F, OriginalLivenessData, GC); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; - analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info); + analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info, GC); } } @@ -2683,7 +2694,7 @@ TargetTransformInfo &TTI, SmallVectorImpl &ToUpdate, DefiningValueMapTy &DVCache, - IsKnownBaseMapTy &KnownBases) { + IsKnownBaseMapTy &KnownBases, GCStrategy *GC) { #ifndef NDEBUG // Validate the input std::set Uniqued; @@ -2718,9 +2729,9 @@ SmallVector DeoptValues; for (Value *Arg : GetDeoptBundleOperands(Call)) { - assert(!isUnhandledGCPointerType(Arg->getType()) && + assert(!isUnhandledGCPointerType(Arg->getType(), GC) && "support for FCA unimplemented"); - if (isHandledGCPointerType(Arg->getType())) + if (isHandledGCPointerType(Arg->getType(), GC)) DeoptValues.push_back(Arg); } @@ -2731,7 +2742,7 @@ // A) Identify all gc pointers which are statically live at the given call // site. - findLiveReferences(F, DT, ToUpdate, Records); + findLiveReferences(F, DT, ToUpdate, Records, GC); /// Global mapping from live pointers to a base-defining-value. PointerToBaseTy PointerToBase; @@ -2782,7 +2793,7 @@ // By selecting base pointers, we've effectively inserted new uses. Thus, we // need to rerun liveness. We may *also* have inserted new defs, but that's // not the key issue. - recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase); + recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase, GC); if (PrintBasePointers) { errs() << "Base Pairs: (w/Relocation)\n"; @@ -2842,7 +2853,7 @@ // the old statepoint calls as we go.) for (size_t i = 0; i < Records.size(); i++) makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements, - PointerToBase); + PointerToBase, GC); ToUpdate.clear(); // prevent accident use of invalid calls. @@ -2899,7 +2910,7 @@ #ifndef NDEBUG // Validation check for (auto *Ptr : Live) - assert(isHandledGCPointerType(Ptr->getType()) && + assert(isHandledGCPointerType(Ptr->getType(), GC) && "must be a gc pointer type"); #endif @@ -3026,23 +3037,39 @@ } } +/// Looks up the GC strategy for a given function, returning null if the +/// function doesn't have a GC tag. The strategy is stored in the cache. +static std::unique_ptr findGCStrategy(Function &F) { + if (!F.hasGC()) + return nullptr; + + return getGCStrategy(F.getGC()); +} + /// Returns true if this function should be rewritten by this pass. The main /// point of this function is as an extension point for custom logic. -static bool shouldRewriteStatepointsIn(Function &F) { - // TODO: This should check the GCStrategy - if (F.hasGC()) { - const auto &FunctionGCName = F.getGC(); - const StringRef StatepointExampleName("statepoint-example"); - const StringRef CoreCLRName("coreclr"); - return (StatepointExampleName == FunctionGCName) || - (CoreCLRName == FunctionGCName); - } else +static bool shouldRewriteStatepointsIn(Function &F, GCStrategy *Strategy) { + if (!F.hasGC()) return false; + + assert(Strategy && "GC strategy is required by function, but was not found"); + return Strategy->useStatepoints(); +} + +/// Find whether or not at least one function in a given module should be +/// rewritten, as per the rewrite policy. +static bool anyFunctionNeedsRewrite(Module &M) { + for (Function &F : M) { + std::unique_ptr GC = findGCStrategy(F); + if (shouldRewriteStatepointsIn(F, GC.get())) + return true; + } + return false; } static void stripNonValidData(Module &M) { #ifndef NDEBUG - assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!"); + assert(anyFunctionNeedsRewrite(M) && "precondition!"); #endif for (Function &F : M) @@ -3054,10 +3081,11 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + GCStrategy *GC) { assert(!F.isDeclaration() && !F.empty() && "need function body to rewrite statepoints in"); - assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision"); + assert(shouldRewriteStatepointsIn(F, GC) && "mismatch in rewrite decision"); auto NeedsRewrite = [&TLI](Instruction &I) { if (const auto *Call = dyn_cast(&I)) { @@ -3200,8 +3228,8 @@ MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases); if (!ParsePointNeeded.empty()) - MadeChange |= - insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases); + MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, + KnownBases, GC); return MadeChange; } @@ -3216,7 +3244,7 @@ /// the live-out set of the basic block static void computeLiveInValues(BasicBlock::reverse_iterator Begin, BasicBlock::reverse_iterator End, - SetVector &LiveTmp) { + SetVector &LiveTmp, GCStrategy *GC) { for (auto &I : make_range(Begin, End)) { // KILL/Def - Remove this definition from LiveIn LiveTmp.remove(&I); @@ -3228,9 +3256,9 @@ // USE - Add to the LiveIn set for this instruction for (Value *V : I.operands()) { - assert(!isUnhandledGCPointerType(V->getType()) && + assert(!isUnhandledGCPointerType(V->getType(), GC) && "support for FCA unimplemented"); - if (isHandledGCPointerType(V->getType()) && !isa(V)) { + if (isHandledGCPointerType(V->getType(), GC) && !isa(V)) { // The choice to exclude all things constant here is slightly subtle. // There are two independent reasons: // - We assume that things which are constant (from LLVM's definition) @@ -3247,7 +3275,8 @@ } } -static void computeLiveOutSeed(BasicBlock *BB, SetVector &LiveTmp) { +static void computeLiveOutSeed(BasicBlock *BB, SetVector &LiveTmp, + GCStrategy *GC) { for (BasicBlock *Succ : successors(BB)) { for (auto &I : *Succ) { PHINode *PN = dyn_cast(&I); @@ -3255,18 +3284,18 @@ break; Value *V = PN->getIncomingValueForBlock(BB); - assert(!isUnhandledGCPointerType(V->getType()) && + assert(!isUnhandledGCPointerType(V->getType(), GC) && "support for FCA unimplemented"); - if (isHandledGCPointerType(V->getType()) && !isa(V)) + if (isHandledGCPointerType(V->getType(), GC) && !isa(V)) LiveTmp.insert(V); } } } -static SetVector computeKillSet(BasicBlock *BB) { +static SetVector computeKillSet(BasicBlock *BB, GCStrategy *GC) { SetVector KillSet; for (Instruction &I : *BB) - if (isHandledGCPointerType(I.getType())) + if (isHandledGCPointerType(I.getType(), GC)) KillSet.insert(&I); return KillSet; } @@ -3301,14 +3330,14 @@ #endif static void computeLiveInValues(DominatorTree &DT, Function &F, - GCPtrLivenessData &Data) { + GCPtrLivenessData &Data, GCStrategy *GC) { SmallSetVector Worklist; // Seed the liveness for each individual block for (BasicBlock &BB : F) { - Data.KillSet[&BB] = computeKillSet(&BB); + Data.KillSet[&BB] = computeKillSet(&BB, GC); Data.LiveSet[&BB].clear(); - computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); + computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], GC); #ifndef NDEBUG for (Value *Kill : Data.KillSet[&BB]) @@ -3316,7 +3345,7 @@ #endif Data.LiveOut[&BB] = SetVector(); - computeLiveOutSeed(&BB, Data.LiveOut[&BB]); + computeLiveOutSeed(&BB, Data.LiveOut[&BB], GC); Data.LiveIn[&BB] = Data.LiveSet[&BB]; Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]); Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]); @@ -3368,7 +3397,7 @@ } static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, - StatepointLiveSetTy &Out) { + StatepointLiveSetTy &Out, GCStrategy *GC) { BasicBlock *BB = Inst->getParent(); // Note: The copy is intentional and required @@ -3379,8 +3408,8 @@ // call result is not live (normal), nor are it's arguments // (unless they're used again later). This adjustment is // specifically what we need to relocate - computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), - LiveOut); + computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), LiveOut, + GC); LiveOut.remove(Inst); Out.insert(LiveOut.begin(), LiveOut.end()); } @@ -3388,9 +3417,10 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &Info, - PointerToBaseTy &PointerToBase) { + PointerToBaseTy &PointerToBase, + GCStrategy *GC) { StatepointLiveSetTy Updated; - findLiveSetAtInst(Call, RevisedLivenessData, Updated); + findLiveSetAtInst(Call, RevisedLivenessData, Updated, GC); // We may have base pointers which are now live that weren't before. We need // to update the PointerToBase structure to reflect this.