diff --git a/llvm/docs/Statepoints.rst b/llvm/docs/Statepoints.rst --- a/llvm/docs/Statepoints.rst +++ b/llvm/docs/Statepoints.rst @@ -556,10 +556,6 @@ might contain a safepoint poll with a ``gc.statepoint`` and associated full relocation sequence, including all required ``gc.relocates``. -Note that by default, this pass only runs for the "statepoint-example" or -"core-clr" gc strategies. You will need to add your custom strategy to this -list or use one of the predefined ones. - As an example, given this code: .. code-block:: llvm @@ -584,7 +580,10 @@ In the above examples, the addrspace(1) marker on the pointers is the mechanism that the ``statepoint-example`` GC strategy uses to distinguish references from non references. The pass assumes that all addrspace(1) pointers are non-integral -pointer types. Address space 1 is not globally reserved for this purpose. +pointer types. Address space 1 is not globally reserved for this purpose. This +is controlled via GCStrategy::isGCManagedPointer, which checks for addrspace(1) +in the ``statepoint-example`` and ``coreclr`` strategies, however custom +strategies don't have to follow this convention. This pass can be used an utility function by a language frontend that doesn't want to manually reason about liveness, base pointers, or relocation when diff --git a/llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h b/llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h --- a/llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h +++ b/llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h @@ -25,12 +25,13 @@ class Module; class TargetTransformInfo; class TargetLibraryInfo; +class GCStrategy; struct RewriteStatepointsForGC : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); bool runOnFunction(Function &F, DominatorTree &, TargetTransformInfo &, - const TargetLibraryInfo &); + const TargetLibraryInfo &, GCStrategy *); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GCStrategy.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" @@ -69,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -112,6 +114,8 @@ static cl::opt RematDerivedAtUses("rs4gc-remat-derived-at-uses", cl::Hidden, cl::init(true)); +using GCStrategyCache = std::map>; + /// The IR fed into RewriteStatepointsForGC may have had attributes and /// metadata implying dereferenceability that are no longer valid/correct after /// RewriteStatepointsForGC has run. This is because semantically, after @@ -123,28 +127,36 @@ /// noalias objects. /// Apart from attributes and metadata, we also remove instructions that imply /// constant physical memory: llvm.invariant.start. -static void stripNonValidData(Module &M); +static void stripNonValidData(Module &M, GCStrategyCache &strategies); + +// Find the GC strategy for a function, or null if it doesn't have one. +// Since this may involve creating the strategy, they're cached and stored in +// the provided map. +static GCStrategy *findGCStrategy(Function &F, GCStrategyCache &cache); -static bool shouldRewriteStatepointsIn(Function &F); +static bool shouldRewriteStatepointsIn(Function &F, GCStrategy *GC); PreservedAnalyses RewriteStatepointsForGC::run(Module &M, ModuleAnalysisManager &AM) { bool Changed = false; auto &FAM = AM.getResult(M).getManager(); + GCStrategyCache strategies; for (Function &F : M) { // Nothing to do for declarations. if (F.isDeclaration() || F.empty()) continue; + GCStrategy *GC = findGCStrategy(F, strategies); + // Policy choice says not to rewrite - the most common reason is that we're // compiling code without a GCStrategy. - if (!shouldRewriteStatepointsIn(F)) + if (!shouldRewriteStatepointsIn(F, GC)) continue; auto &DT = FAM.getResult(F); auto &TTI = FAM.getResult(F); auto &TLI = FAM.getResult(F); - Changed |= runOnFunction(F, DT, TTI, TLI); + Changed |= runOnFunction(F, DT, TTI, TLI, GC); } if (!Changed) return PreservedAnalyses::all(); @@ -152,7 +164,7 @@ // stripNonValidData asserts that shouldRewriteStatepointsIn // returns true for at least one function in the module. Since at least // one function changed, we know that the precondition is satisfied. - stripNonValidData(M); + stripNonValidData(M, strategies); PreservedAnalyses PA; PA.preserve(); @@ -175,14 +187,17 @@ bool runOnModule(Module &M) override { bool Changed = false; + GCStrategyCache strategies; for (Function &F : M) { // Nothing to do for declarations. if (F.isDeclaration() || F.empty()) continue; + GCStrategy *GC = findGCStrategy(F, strategies); + // Policy choice says not to rewrite - the most common reason is that // we're compiling code without a GCStrategy. - if (!shouldRewriteStatepointsIn(F)) + if (!shouldRewriteStatepointsIn(F, GC)) continue; TargetTransformInfo &TTI = @@ -191,7 +206,7 @@ getAnalysis().getTLI(F); auto &DT = getAnalysis(F).getDomTree(); - Changed |= Impl.runOnFunction(F, DT, TTI, TLI); + Changed |= Impl.runOnFunction(F, DT, TTI, TLI, GC); } if (!Changed) @@ -200,7 +215,7 @@ // stripNonValidData asserts that shouldRewriteStatepointsIn // returns true for at least one function in the module. Since at least // one function changed, we know that the precondition is satisfied. - stripNonValidData(M); + stripNonValidData(M, strategies); return true; } @@ -311,37 +326,36 @@ /// Compute the live-in set for every basic block in the function static void computeLiveInValues(DominatorTree &DT, Function &F, - GCPtrLivenessData &Data); + GCPtrLivenessData &Data, GCStrategy *GC); /// Given results from the dataflow liveness computation, find the set of live /// Values at a particular instruction. static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, - StatepointLiveSetTy &out); + StatepointLiveSetTy &out, GCStrategy *GC); -// TODO: Once we can get to the GCStrategy, this becomes -// std::optional isGCManagedPointer(const Type *Ty) const override { +static bool isGCPointerType(Type *T, GCStrategy *GC) { + assert(GC && "GC Strategy for isGCPointerType cannot be null"); -static bool isGCPointerType(Type *T) { - if (auto *PT = dyn_cast(T)) - // For the sake of this example GC, we arbitrarily pick addrspace(1) as our - // GC managed heap. We know that a pointer into this heap needs to be - // updated and that no other pointer does. - return PT->getAddressSpace() == 1; - return false; + if (!isa(T)) + return false; + + if (auto IsManaged = GC->isGCManagedPointer(T)) + return *IsManaged; + return true; // conservative - same as StatepointLowering } // Return true if this type is one which a) is a gc pointer or contains a GC // pointer and b) is of a type this code expects to encounter as a live value. // (The insertion code will assert that a type which matches (a) and not (b) // is not encountered.) -static bool isHandledGCPointerType(Type *T) { +static bool isHandledGCPointerType(Type *T, GCStrategy *GC) { // We fully support gc pointers - if (isGCPointerType(T)) + if (isGCPointerType(T, GC)) return true; // We partially support vectors of gc pointers. The code will assert if it // can't handle something. if (auto VT = dyn_cast(T)) - if (isGCPointerType(VT->getElementType())) + if (isGCPointerType(VT->getElementType(), GC)) return true; return false; } @@ -349,23 +363,24 @@ #ifndef NDEBUG /// Returns true if this type contains a gc pointer whether we know how to /// handle that type or not. -static bool containsGCPtrType(Type *Ty) { - if (isGCPointerType(Ty)) +static bool containsGCPtrType(Type *Ty, GCStrategy *GC) { + if (isGCPointerType(Ty, GC)) return true; if (VectorType *VT = dyn_cast(Ty)) - return isGCPointerType(VT->getScalarType()); + return isGCPointerType(VT->getScalarType(), GC); if (ArrayType *AT = dyn_cast(Ty)) - return containsGCPtrType(AT->getElementType()); + return containsGCPtrType(AT->getElementType(), GC); if (StructType *ST = dyn_cast(Ty)) - return llvm::any_of(ST->elements(), containsGCPtrType); + return llvm::any_of(ST->elements(), + [GC](Type *Ty) { return containsGCPtrType(Ty, GC); }); return false; } // Returns true if this is a type which a) is a gc pointer or contains a GC // pointer and b) is of a type which the code doesn't expect (i.e. first class // aggregates). Used to trip assertions. -static bool isUnhandledGCPointerType(Type *Ty) { - return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); +static bool isUnhandledGCPointerType(Type *Ty, GCStrategy *GC) { + return containsGCPtrType(Ty, GC) && !isHandledGCPointerType(Ty, GC); } #endif @@ -382,9 +397,9 @@ // live. Values used by that instruction are considered live. static void analyzeParsePointLiveness( DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call, - PartiallyConstructedSafepointRecord &Result) { + PartiallyConstructedSafepointRecord &Result, GCStrategy *GC) { StatepointLiveSetTy LiveSet; - findLiveSetAtInst(Call, OriginalLivenessData, LiveSet); + findLiveSetAtInst(Call, OriginalLivenessData, LiveSet, GC); if (PrintLiveSet) { dbgs() << "Live Variables:\n"; @@ -1385,20 +1400,21 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &result, - PointerToBaseTy &PointerToBase); + PointerToBaseTy &PointerToBase, + GCStrategy *GC); static void recomputeLiveInValues( Function &F, DominatorTree &DT, ArrayRef toUpdate, MutableArrayRef records, - PointerToBaseTy &PointerToBase) { + PointerToBaseTy &PointerToBase, GCStrategy *GC) { // TODO-PERF: reuse the original liveness, then simply run the dataflow // again. The old values are still live and will help it stabilize quickly. GCPtrLivenessData RevisedLivenessData; - computeLiveInValues(DT, F, RevisedLivenessData); + computeLiveInValues(DT, F, RevisedLivenessData, GC); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; - recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, - PointerToBase); + recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, PointerToBase, + GC); } } @@ -1522,7 +1538,7 @@ static void CreateGCRelocates(ArrayRef LiveVariables, ArrayRef BasePtrs, Instruction *StatepointToken, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, GCStrategy *GC) { if (LiveVariables.empty()) return; @@ -1542,8 +1558,8 @@ // towards a single unified pointer type anyways, we can just cast everything // to an i8* of the right address space. A bitcast is added later to convert // gc_relocate to the actual value's type. - auto getGCRelocateDecl = [&] (Type *Ty) { - assert(isHandledGCPointerType(Ty)); + auto getGCRelocateDecl = [&](Type *Ty) { + assert(isHandledGCPointerType(Ty, GC)); auto AS = Ty->getScalarType()->getPointerAddressSpace(); Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS); if (auto *VT = dyn_cast(Ty)) @@ -1668,7 +1684,8 @@ const SmallVectorImpl &LiveVariables, PartiallyConstructedSafepointRecord &Result, std::vector &Replacements, - const PointerToBaseTy &PointerToBase) { + const PointerToBaseTy &PointerToBase, + GCStrategy *GC) { assert(BasePtrs.size() == LiveVariables.size()); // Then go ahead and use the builder do actually do the inserts. We insert @@ -1901,7 +1918,7 @@ Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst(); Result.UnwindToken = ExceptionalToken; - CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder); + CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder, GC); // Generate gc relocates and returns for normal block BasicBlock *NormalDest = II->getNormalDest(); @@ -1947,7 +1964,7 @@ Result.StatepointToken = Token; // Second, create a gc.relocate for every live variable - CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder); + CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder, GC); } // Replace an existing gc.statepoint with a new one and a set of gc.relocates @@ -1959,7 +1976,7 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call, PartiallyConstructedSafepointRecord &Result, std::vector &Replacements, - const PointerToBaseTy &PointerToBase) { + const PointerToBaseTy &PointerToBase, GCStrategy *GC) { const auto &LiveSet = Result.LiveSet; // Convert to vector for efficient cross referencing. @@ -1976,7 +1993,7 @@ // Do the actual rewriting and delete the old statepoint makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements, - PointerToBase); + PointerToBase, GC); } // Helper function for the relocationViaAlloca. @@ -2277,12 +2294,13 @@ static void findLiveReferences( Function &F, DominatorTree &DT, ArrayRef toUpdate, - MutableArrayRef records) { + MutableArrayRef records, + GCStrategy *GC) { GCPtrLivenessData OriginalLivenessData; - computeLiveInValues(DT, F, OriginalLivenessData); + computeLiveInValues(DT, F, OriginalLivenessData, GC); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; - analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info); + analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info, GC); } } @@ -2683,7 +2701,7 @@ TargetTransformInfo &TTI, SmallVectorImpl &ToUpdate, DefiningValueMapTy &DVCache, - IsKnownBaseMapTy &KnownBases) { + IsKnownBaseMapTy &KnownBases, GCStrategy *GC) { #ifndef NDEBUG // Validate the input std::set Uniqued; @@ -2718,9 +2736,9 @@ SmallVector DeoptValues; for (Value *Arg : GetDeoptBundleOperands(Call)) { - assert(!isUnhandledGCPointerType(Arg->getType()) && + assert(!isUnhandledGCPointerType(Arg->getType(), GC) && "support for FCA unimplemented"); - if (isHandledGCPointerType(Arg->getType())) + if (isHandledGCPointerType(Arg->getType(), GC)) DeoptValues.push_back(Arg); } @@ -2731,7 +2749,7 @@ // A) Identify all gc pointers which are statically live at the given call // site. - findLiveReferences(F, DT, ToUpdate, Records); + findLiveReferences(F, DT, ToUpdate, Records, GC); /// Global mapping from live pointers to a base-defining-value. PointerToBaseTy PointerToBase; @@ -2782,7 +2800,7 @@ // By selecting base pointers, we've effectively inserted new uses. Thus, we // need to rerun liveness. We may *also* have inserted new defs, but that's // not the key issue. - recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase); + recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase, GC); if (PrintBasePointers) { errs() << "Base Pairs: (w/Relocation)\n"; @@ -2842,7 +2860,7 @@ // the old statepoint calls as we go.) for (size_t i = 0; i < Records.size(); i++) makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements, - PointerToBase); + PointerToBase, GC); ToUpdate.clear(); // prevent accident use of invalid calls. @@ -2899,7 +2917,7 @@ #ifndef NDEBUG // Validation check for (auto *Ptr : Live) - assert(isHandledGCPointerType(Ptr->getType()) && + assert(isHandledGCPointerType(Ptr->getType(), GC) && "must be a gc pointer type"); #endif @@ -3026,23 +3044,42 @@ } } +/// Looks up the GC strategy for a given function, returning null if the +/// function doesn't have a GC tag. The strategy is stored in the cache. +static GCStrategy *findGCStrategy(Function &F, GCStrategyCache &cache) { + if (!F.hasGC()) + return nullptr; + + if (cache.count(F.getGC()) == 0) { + cache[F.getGC()] = getGCStrategy(F.getGC()); + } + return cache.at(F.getGC()).get(); +} + /// Returns true if this function should be rewritten by this pass. The main /// point of this function is as an extension point for custom logic. -static bool shouldRewriteStatepointsIn(Function &F) { - // TODO: This should check the GCStrategy - if (F.hasGC()) { - const auto &FunctionGCName = F.getGC(); - const StringRef StatepointExampleName("statepoint-example"); - const StringRef CoreCLRName("coreclr"); - return (StatepointExampleName == FunctionGCName) || - (CoreCLRName == FunctionGCName); - } else +static bool shouldRewriteStatepointsIn(Function &F, GCStrategy *Strategy) { + if (!F.hasGC()) return false; + + assert(Strategy && "GC strategy is required by function, but was not found"); + return Strategy->useStatepoints(); +} + +/// Find whether or not at least one function in a given module should be +/// rewritten, as per the rewrite policy. +static bool anyFunctionNeedsRewrite(Module &M, GCStrategyCache &strategies) { + for (Function &F : M) { + GCStrategy *GC = findGCStrategy(F, strategies); + if (shouldRewriteStatepointsIn(F, GC)) + return true; + } + return false; } -static void stripNonValidData(Module &M) { +static void stripNonValidData(Module &M, GCStrategyCache &strategies) { #ifndef NDEBUG - assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!"); + assert(anyFunctionNeedsRewrite(M, strategies) && "precondition!"); #endif for (Function &F : M) @@ -3054,10 +3091,11 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + GCStrategy *GC) { assert(!F.isDeclaration() && !F.empty() && "need function body to rewrite statepoints in"); - assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision"); + assert(shouldRewriteStatepointsIn(F, GC) && "mismatch in rewrite decision"); auto NeedsRewrite = [&TLI](Instruction &I) { if (const auto *Call = dyn_cast(&I)) { @@ -3200,8 +3238,8 @@ MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases); if (!ParsePointNeeded.empty()) - MadeChange |= - insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases); + MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, + KnownBases, GC); return MadeChange; } @@ -3216,7 +3254,7 @@ /// the live-out set of the basic block static void computeLiveInValues(BasicBlock::reverse_iterator Begin, BasicBlock::reverse_iterator End, - SetVector &LiveTmp) { + SetVector &LiveTmp, GCStrategy *GC) { for (auto &I : make_range(Begin, End)) { // KILL/Def - Remove this definition from LiveIn LiveTmp.remove(&I); @@ -3228,9 +3266,9 @@ // USE - Add to the LiveIn set for this instruction for (Value *V : I.operands()) { - assert(!isUnhandledGCPointerType(V->getType()) && + assert(!isUnhandledGCPointerType(V->getType(), GC) && "support for FCA unimplemented"); - if (isHandledGCPointerType(V->getType()) && !isa(V)) { + if (isHandledGCPointerType(V->getType(), GC) && !isa(V)) { // The choice to exclude all things constant here is slightly subtle. // There are two independent reasons: // - We assume that things which are constant (from LLVM's definition) @@ -3247,7 +3285,8 @@ } } -static void computeLiveOutSeed(BasicBlock *BB, SetVector &LiveTmp) { +static void computeLiveOutSeed(BasicBlock *BB, SetVector &LiveTmp, + GCStrategy *GC) { for (BasicBlock *Succ : successors(BB)) { for (auto &I : *Succ) { PHINode *PN = dyn_cast(&I); @@ -3255,18 +3294,18 @@ break; Value *V = PN->getIncomingValueForBlock(BB); - assert(!isUnhandledGCPointerType(V->getType()) && + assert(!isUnhandledGCPointerType(V->getType(), GC) && "support for FCA unimplemented"); - if (isHandledGCPointerType(V->getType()) && !isa(V)) + if (isHandledGCPointerType(V->getType(), GC) && !isa(V)) LiveTmp.insert(V); } } } -static SetVector computeKillSet(BasicBlock *BB) { +static SetVector computeKillSet(BasicBlock *BB, GCStrategy *GC) { SetVector KillSet; for (Instruction &I : *BB) - if (isHandledGCPointerType(I.getType())) + if (isHandledGCPointerType(I.getType(), GC)) KillSet.insert(&I); return KillSet; } @@ -3301,14 +3340,14 @@ #endif static void computeLiveInValues(DominatorTree &DT, Function &F, - GCPtrLivenessData &Data) { + GCPtrLivenessData &Data, GCStrategy *GC) { SmallSetVector Worklist; // Seed the liveness for each individual block for (BasicBlock &BB : F) { - Data.KillSet[&BB] = computeKillSet(&BB); + Data.KillSet[&BB] = computeKillSet(&BB, GC); Data.LiveSet[&BB].clear(); - computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); + computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], GC); #ifndef NDEBUG for (Value *Kill : Data.KillSet[&BB]) @@ -3316,7 +3355,7 @@ #endif Data.LiveOut[&BB] = SetVector(); - computeLiveOutSeed(&BB, Data.LiveOut[&BB]); + computeLiveOutSeed(&BB, Data.LiveOut[&BB], GC); Data.LiveIn[&BB] = Data.LiveSet[&BB]; Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]); Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]); @@ -3368,7 +3407,7 @@ } static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, - StatepointLiveSetTy &Out) { + StatepointLiveSetTy &Out, GCStrategy *GC) { BasicBlock *BB = Inst->getParent(); // Note: The copy is intentional and required @@ -3379,8 +3418,8 @@ // call result is not live (normal), nor are it's arguments // (unless they're used again later). This adjustment is // specifically what we need to relocate - computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), - LiveOut); + computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), LiveOut, + GC); LiveOut.remove(Inst); Out.insert(LiveOut.begin(), LiveOut.end()); } @@ -3388,9 +3427,10 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &Info, - PointerToBaseTy &PointerToBase) { + PointerToBaseTy &PointerToBase, + GCStrategy *GC) { StatepointLiveSetTy Updated; - findLiveSetAtInst(Call, RevisedLivenessData, Updated); + findLiveSetAtInst(Call, RevisedLivenessData, Updated, GC); // We may have base pointers which are now live that weren't before. We need // to update the PointerToBase structure to reflect this.