Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -73,10 +73,6 @@ "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); -static cl::opt - DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), - cl::desc("Disable GC optimizations in CodeGenPrepare")); - static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -186,7 +182,6 @@ const SmallVectorImpl &Exts, unsigned CreatedInstCost); bool splitBranchCondition(Function &F); - bool simplifyOffsetableRelocate(Instruction &I); }; } @@ -295,16 +290,6 @@ EverMadeChange |= MadeChange; } - if (!DisableGCOpts) { - SmallVector Statepoints; - for (BasicBlock &BB : F) - for (Instruction &I : BB) - if (isStatepoint(I)) - Statepoints.push_back(&I); - for (auto &I : Statepoints) - EverMadeChange |= simplifyOffsetableRelocate(*I); - } - return EverMadeChange; } @@ -518,182 +503,6 @@ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } -// Computes a map of base pointer relocation instructions to corresponding -// derived pointer relocation instructions given a vector of all relocate calls -static void computeBaseDerivedRelocateMap( - const SmallVectorImpl &AllRelocateCalls, - DenseMap> & - RelocateInstMap) { - // Collect information in two maps: one primarily for locating the base object - // while filling the second map; the second map is the final structure holding - // a mapping between Base and corresponding Derived relocate calls - DenseMap, IntrinsicInst *> RelocateIdxMap; - for (auto &U : AllRelocateCalls) { - GCRelocateOperands ThisRelocate(U); - IntrinsicInst *I = cast(U); - auto K = std::make_pair(ThisRelocate.getBasePtrIndex(), - ThisRelocate.getDerivedPtrIndex()); - RelocateIdxMap.insert(std::make_pair(K, I)); - } - for (auto &Item : RelocateIdxMap) { - std::pair Key = Item.first; - if (Key.first == Key.second) - // Base relocation: nothing to insert - continue; - - IntrinsicInst *I = Item.second; - auto BaseKey = std::make_pair(Key.first, Key.first); - - // We're iterating over RelocateIdxMap so we cannot modify it. - auto MaybeBase = RelocateIdxMap.find(BaseKey); - if (MaybeBase == RelocateIdxMap.end()) - // TODO: We might want to insert a new base object relocate and gep off - // that, if there are enough derived object relocates. - continue; - - RelocateInstMap[MaybeBase->second].push_back(I); - } -} - -// Accepts a GEP and extracts the operands into a vector provided they're all -// small integer constants -static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, - SmallVectorImpl &OffsetV) { - for (unsigned i = 1; i < GEP->getNumOperands(); i++) { - // Only accept small constant integer operands - auto Op = dyn_cast(GEP->getOperand(i)); - if (!Op || Op->getZExtValue() > 20) - return false; - } - - for (unsigned i = 1; i < GEP->getNumOperands(); i++) - OffsetV.push_back(GEP->getOperand(i)); - return true; -} - -// Takes a RelocatedBase (base pointer relocation instruction) and Targets to -// replace, computes a replacement, and affects it. -static bool -simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, - const SmallVectorImpl &Targets) { - bool MadeChange = false; - for (auto &ToReplace : Targets) { - GCRelocateOperands MasterRelocate(RelocatedBase); - GCRelocateOperands ThisRelocate(ToReplace); - - assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() && - "Not relocating a derived object of the original base object"); - if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) { - // A duplicate relocate call. TODO: coalesce duplicates. - continue; - } - - Value *Base = ThisRelocate.getBasePtr(); - auto Derived = dyn_cast(ThisRelocate.getDerivedPtr()); - if (!Derived || Derived->getPointerOperand() != Base) - continue; - - SmallVector OffsetV; - if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) - continue; - - // Create a Builder and replace the target callsite with a gep - assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"); - - // Insert after RelocatedBase - IRBuilder<> Builder(RelocatedBase->getNextNode()); - Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); - - // If gc_relocate does not match the actual type, cast it to the right type. - // In theory, there must be a bitcast after gc_relocate if the type does not - // match, and we should reuse it to get the derived pointer. But it could be - // cases like this: - // bb1: - // ... - // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) - // br label %merge - // - // bb2: - // ... - // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) - // br label %merge - // - // merge: - // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] - // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* - // - // In this case, we can not find the bitcast any more. So we insert a new bitcast - // no matter there is already one or not. In this way, we can handle all cases, and - // the extra bitcast should be optimized away in later passes. - Instruction *ActualRelocatedBase = RelocatedBase; - if (RelocatedBase->getType() != Base->getType()) { - ActualRelocatedBase = - cast(Builder.CreateBitCast(RelocatedBase, Base->getType())); - } - Value *Replacement = Builder.CreateGEP( - Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); - Instruction *ReplacementInst = cast(Replacement); - Replacement->takeName(ToReplace); - // If the newly generated derived pointer's type does not match the original derived - // pointer's type, cast the new derived pointer to match it. Same reasoning as above. - Instruction *ActualReplacement = ReplacementInst; - if (ReplacementInst->getType() != ToReplace->getType()) { - ActualReplacement = - cast(Builder.CreateBitCast(ReplacementInst, ToReplace->getType())); - } - ToReplace->replaceAllUsesWith(ActualReplacement); - ToReplace->eraseFromParent(); - - MadeChange = true; - } - return MadeChange; -} - -// Turns this: -// -// %base = ... -// %ptr = gep %base + 15 -// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) -// %base' = relocate(%tok, i32 4, i32 4) -// %ptr' = relocate(%tok, i32 4, i32 5) -// %val = load %ptr' -// -// into this: -// -// %base = ... -// %ptr = gep %base + 15 -// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) -// %base' = gc.relocate(%tok, i32 4, i32 4) -// %ptr' = gep %base' + 15 -// %val = load %ptr' -bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { - bool MadeChange = false; - SmallVector AllRelocateCalls; - - for (auto *U : I.users()) - if (isGCRelocate(dyn_cast(U))) - // Collect all the relocate calls associated with a statepoint - AllRelocateCalls.push_back(U); - - // We need atleast one base pointer relocation + one derived pointer - // relocation to mangle - if (AllRelocateCalls.size() < 2) - return false; - - // RelocateInstMap is a mapping from the base relocate instruction to the - // corresponding derived relocate instructions - DenseMap> RelocateInstMap; - computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); - if (RelocateInstMap.empty()) - return false; - - for (auto &Item : RelocateInstMap) - // Item.first is the RelocatedBase to offset against - // Item.second is the vector of Targets to replace - MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); - return MadeChange; -} - /// SinkCast - Sink the specified cast instruction into its user blocks static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); Index: lib/Transforms/Scalar/RewriteStatepointsForGC.cpp =================================================================== --- lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -14,6 +14,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseSet.h" @@ -56,6 +57,12 @@ static cl::opt PrintBasePointers("spp-print-base-pointers", cl::Hidden, cl::init(false)); +// Cost threshold measuring when it is profitable to rematerialize value instead +// of relocating it +static cl::opt +RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden, + cl::init(6)); + #ifdef XDEBUG static bool ClobberNonLive = true; #else @@ -78,6 +85,7 @@ // We add and rewrite a bunch of instructions, but don't really do much // else. We could in theory preserve a lot more analyses here. AU.addRequired(); + AU.addRequired(); } }; } // namespace @@ -123,6 +131,7 @@ // types, then update all the second type to the first type typedef DenseMap DefiningValueMapTy; typedef DenseSet StatepointLiveSetTy; +typedef DenseMap RematerializedValueMapTy; struct PartiallyConstructedSafepointRecord { /// The set of values known to be live accross this safepoint @@ -138,6 +147,11 @@ /// Instruction to which exceptional gc relocates are attached /// Makes it easier to iterate through them during relocationViaAlloca. Instruction *UnwindToken; + + /// Record live values we are rematerialized instead of relocating. + /// They are not included into 'liveset' field. + /// Maps rematerialized copy to it's original value. + RematerializedValueMapTy RematerializedValues; }; } @@ -1389,6 +1403,30 @@ } } +// Helper function for the "relocationViaAlloca". Similar to the +// "insertRelocationStores" but works for rematerialized values. +static void +insertRematerializationStores( + RematerializedValueMapTy RematerializedValues, + DenseMap &AllocaMap, + DenseSet &VisitedLiveValues) { + + for (auto RematerializedValuePair: RematerializedValues) { + Value *RematerializedValue = RematerializedValuePair.first; + Value *OriginalValue = RematerializedValuePair.second; + + assert(AllocaMap.count(OriginalValue)); + Value *Alloca = AllocaMap[OriginalValue]; + + StoreInst *Store = new StoreInst(RematerializedValue, Alloca); + Store->insertAfter(cast(RematerializedValue)); + +#ifndef NDEBUG + VisitedLiveValues.insert(OriginalValue); +#endif + } +} + /// do all the relocation update via allocas and mem2reg static void relocationViaAlloca( Function &F, DominatorTree &DT, ArrayRef live, @@ -1406,17 +1444,38 @@ // TODO-PERF: change data structures, reserve DenseMap allocaMap; SmallVector PromotableAllocas; + // Used later to chack that we have enough allocas to store all values + std::size_t NumRematerializedValues = 0; PromotableAllocas.reserve(live.size()); + // Emit alloca for "LiveValue" and record it in "allocaMap" and + // "PromotableAllocas" + auto emitAllocaFor = [&](Value *LiveValue) { + AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "", + F.getEntryBlock().getFirstNonPHI()); + allocaMap[LiveValue] = Alloca; + PromotableAllocas.push_back(Alloca); + }; + // emit alloca for each live gc pointer for (unsigned i = 0; i < live.size(); i++) { - Value *liveValue = live[i]; - AllocaInst *alloca = new AllocaInst(liveValue->getType(), "", - F.getEntryBlock().getFirstNonPHI()); - allocaMap[liveValue] = alloca; - PromotableAllocas.push_back(alloca); + emitAllocaFor(live[i]); } + // emit allocas for rematerialized values + for (size_t i = 0; i < records.size(); i++) { + const struct PartiallyConstructedSafepointRecord &Info = records[i]; + + for (auto RematerializedValuePair: Info.RematerializedValues) { + Value *OriginalValue = RematerializedValuePair.second; + if (allocaMap.count(OriginalValue) != 0) { + continue; + } + emitAllocaFor(OriginalValue); + ++NumRematerializedValues; + } + } + // The next two loops are part of the same conceptual operation. We need to // insert a store to the alloca after the original def and at each // redefinition. We need to insert a load before each use. These are split @@ -1444,6 +1503,10 @@ visitedLiveValues); } + // Do similar thing with rematerialized values + insertRematerializationStores(info.RematerializedValues, allocaMap, + visitedLiveValues); + if (ClobberNonLive) { // As a debuging aid, pretend that an unrelocated pointer becomes null at // the gc.statepoint. This will turn some subtle GC problems into @@ -1548,7 +1611,7 @@ } } - assert(PromotableAllocas.size() == live.size() && + assert(PromotableAllocas.size() == live.size() + NumRematerializedValues && "we must have the same allocas with lives"); if (!PromotableAllocas.empty()) { // apply mem2reg to promote alloca to SSA @@ -1732,6 +1795,191 @@ PromoteMemToReg(Allocas, DT); } +// Helper function for the "rematerializeLiveValues". It walks use chain +// starting from the "CurrentValue" until it meets "BaseValue". Only "simple" +// values are visited (currently it is GEP's and casts). Returns true if it +// sucesfully reached "BaseValue" and false otherwise. +// Fills "ChainToBase" array with all visited values. "BaseValue" is not +// recorded. +static bool findRematerializableChainToBasePointer( + SmallVectorImpl &ChainToBase, + Value *CurrentValue, Value *BaseValue) { + + // We have found a base value + if (CurrentValue == BaseValue) { + return true; + } + + if (GetElementPtrInst *GEP = dyn_cast(CurrentValue)) { + ChainToBase.push_back(GEP); + return findRematerializableChainToBasePointer(ChainToBase, + GEP->getPointerOperand(), + BaseValue); + } + + if (CastInst *CI = dyn_cast(CurrentValue)) { + Value *Def = CI->stripPointerCasts(); + + // This two checks are basically similar. First one is here for the + // consistency with findBasePointers logic. + assert(!isa(Def) && "not a pointer cast found"); + if (!CI->isNoopCast(CI->getModule()->getDataLayout())) + return false; + + ChainToBase.push_back(CI); + return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue); + } + + // Not supported instruction in the chain + return false; +} + +// Helper function for the "rematerializeLiveValues". Compute cost of the use +// chain we are going to rematerialize. +static unsigned +chainToBasePointerCost(SmallVectorImpl &Chain, + TargetTransformInfo *TTI) { + assert(TTI); + + unsigned Cost = 0; + + for (Instruction *Instr: Chain) { + assert(isa(Instr) || isa(Instr)); + + if (CastInst *CI = dyn_cast(Instr)) { + assert(CI->isNoopCast(CI->getModule()->getDataLayout())); + + Type *SrcTy = CI->getOperand(0)->getType(); + Cost += TTI->getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy); + continue; + } + + if (GetElementPtrInst *GEP = dyn_cast(Instr)) { + // Cost of the address calculation + Type *ValTy = GEP->getPointerOperandType()->getPointerElementType(); + bool IsComplex = !GEP->hasAllConstantIndices(); + Cost += TTI->getAddressComputationCost(ValTy, IsComplex); + + // And cost of the GEP itself + // TODO: Use TTI->getGEPCost here (it exists, but appears to be not + // allowed for the external usage) + if (IsComplex) + Cost += 2; + + continue; + } + } + + // Avoid rematerializing very long instruction chains to avoid code size + // problems + if (Chain.size() > 10) + Cost += 10; + + return Cost; +} + +// From the statepoint liveset pick values that are cheaper to recompute then to +// relocate. Remove this values from the liveset, rematerialize them after +// statepoint and record them in "Info" structure. Note that similar to +// relocated values we don't do any user adjustments here. +static void rematerializeLiveValues(CallSite CS, + PartiallyConstructedSafepointRecord &Info, + TargetTransformInfo *TTI) { + assert(TTI); + + // Temporarily store live set in a vector to not worry about iterator + // invalidation in the following loop. + SmallVector LiveSet; + LiveSet.insert(LiveSet.begin(), Info.liveset.begin(), Info.liveset.end()); + + for (Value *LiveValue: LiveSet) { + // For each live pointer find it's defining chain + SmallVector ChainToBase; + assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end()); + bool FoundChain = + findRematerializableChainToBasePointer(ChainToBase, + LiveValue, + Info.PointerToBase[LiveValue]); + // Nothing to do + if (!FoundChain || ChainToBase.size() == 0) + continue; + + // Compute cost of this chain + unsigned Cost = chainToBasePointerCost(ChainToBase, TTI); + // For invokes we need to rematerialize each chain twice - for normal and + // for unwind basic blocks. Model this by multiplying cost by two. + if (CS.isInvoke()) { + Cost *= 2; + } + // If it's too expensive - skip it + if (Cost >= RematerializationThreshold) + continue; + + // Remove value from the live set + Info.liveset.erase(LiveValue); + + // Clone instructions and record them inside "Info" structure + + // Walk backwards to visit top-most instructions first + std::reverse(ChainToBase.begin(), ChainToBase.end()); + + // Utility function which clones all instructions from "ChainToBase" + // and inserts them before "InsertBefore". Returns rematerialized value + // which should be used after statepoint. + auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) { + Instruction *LastClonedValue = nullptr; + Instruction *LastValue = nullptr; + for (Instruction *Instr: ChainToBase) { + // Only GEP's and casts are suported as we need to be careful to not + // introduce any new uses of pointers not in the liveset. + assert(isa(Instr) || isa(Instr)); + + Instruction *ClonedValue = Instr->clone(); + ClonedValue->insertBefore(InsertBefore); + ClonedValue->setName(Instr->getName() + ".remat"); + + // If it is not first instruction in the chain then it uses previously + // cloned value. We should update it to use cloned value. + if (LastClonedValue) { + assert(LastValue); + ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue); + } + + LastClonedValue = ClonedValue; + LastValue = Instr; + } + assert(LastClonedValue); + return LastClonedValue; + }; + + // Different cases for calls and invokes. For invokes we need to clone + // instructions both on normal and unwind path. + if (CS.isCall()) { + Instruction *InsertBefore = CS.getInstruction()->getNextNode(); + assert(InsertBefore); + Instruction *RematerializedValue = rematerializeChain(InsertBefore); + Info.RematerializedValues[RematerializedValue] = LiveValue; + } + else { + assert(CS.isInvoke() && "should be call or invoke"); + + Instruction *NormalInsertBefore = cast(CS.getInstruction())-> + getNormalDest()->getFirstInsertionPt(); + Instruction *UnwindInsertBefore = cast(CS.getInstruction())-> + getUnwindDest()->getFirstInsertionPt(); + + Instruction *NormalRematerializedValue = + rematerializeChain(NormalInsertBefore); + Instruction *UnwindRematerializedValue = + rematerializeChain(UnwindInsertBefore); + + Info.RematerializedValues[NormalRematerializedValue] = LiveValue; + Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; + } + } + +} + static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, SmallVectorImpl &toUpdate) { #ifndef NDEBUG @@ -1867,6 +2115,19 @@ } holders.clear(); + // In order to reduce live set of statepoint we might choose to rematerialize + // some values instead of relocating them. This is purelly an optimization and + // does not influence correctness. + TargetTransformInfo *TTI = + &(P->getAnalysis()).getTTI(F); + + for (size_t i = 0; i < records.size(); i++) { + struct PartiallyConstructedSafepointRecord &info = records[i]; + CallSite &CS = toUpdate[i]; + + rematerializeLiveValues(CS, info, TTI); + } + // Now run through and replace the existing statepoints with new ones with // the live variables listed. We do not yet update uses of the values being // relocated. We have references to live variables that need to Index: test/Transforms/CodeGenPrepare/statepoint-relocate.ll =================================================================== --- test/Transforms/CodeGenPrepare/statepoint-relocate.ll +++ test/Transforms/CodeGenPrepare/statepoint-relocate.ll @@ -1,88 +0,0 @@ -; RUN: opt -codegenprepare -S < %s | FileCheck %s - -target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-linux-gnu" - -declare zeroext i1 @return_i1() - -define i32 @test_sor_basic(i32* %base) gc "statepoint-example" { -; CHECK: getelementptr i32, i32* %base, i32 15 -; CHECK: getelementptr i32, i32* %base-new, i32 15 -entry: - %ptr = getelementptr i32, i32* %base, i32 15 - %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) - %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7) - %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) - %ret = load i32, i32* %ptr-new - ret i32 %ret -} - -define i32 @test_sor_two_derived(i32* %base) gc "statepoint-example" { -; CHECK: getelementptr i32, i32* %base, i32 15 -; CHECK: getelementptr i32, i32* %base, i32 12 -; CHECK: getelementptr i32, i32* %base-new, i32 12 -; CHECK: getelementptr i32, i32* %base-new, i32 15 -entry: - %ptr = getelementptr i32, i32* %base, i32 15 - %ptr2 = getelementptr i32, i32* %base, i32 12 - %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2) - %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7) - %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) - %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9) - %ret = load i32, i32* %ptr-new - ret i32 %ret -} - -define i32 @test_sor_ooo(i32* %base) gc "statepoint-example" { -; CHECK: getelementptr i32, i32* %base, i32 15 -; CHECK: getelementptr i32, i32* %base-new, i32 15 -entry: - %ptr = getelementptr i32, i32* %base, i32 15 - %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) - %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) - %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7) - %ret = load i32, i32* %ptr-new - ret i32 %ret -} - -define i32 @test_sor_gep_smallint([3 x i32]* %base) gc "statepoint-example" { -; CHECK: getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2 -; CHECK: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 2 -entry: - %ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2 - %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr) - %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 7, i32 7) - %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) - %ret = load i32, i32* %ptr-new - ret i32 %ret -} - -define i32 @test_sor_gep_largeint([3 x i32]* %base) gc "statepoint-example" { -; CHECK: getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21 -; CHECK-NOT: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 21 -entry: - %ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21 - %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr) - %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 7, i32 7) - %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) - %ret = load i32, i32* %ptr-new - ret i32 %ret -} - -define i32 @test_sor_noop(i32* %base) gc "statepoint-example" { -; CHECK: getelementptr i32, i32* %base, i32 15 -; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) -; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9) -entry: - %ptr = getelementptr i32, i32* %base, i32 15 - %ptr2 = getelementptr i32, i32* %base, i32 12 - %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2) - %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) - %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9) - %ret = load i32, i32* %ptr-new - ret i32 %ret -} - -declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) -declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32) -declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32) Index: test/Transforms/RewriteStatepointsForGC/basics.ll =================================================================== --- test/Transforms/RewriteStatepointsForGC/basics.ll +++ test/Transforms/RewriteStatepointsForGC/basics.ll @@ -1,5 +1,5 @@ ; This is a collection of really basic tests for gc.statepoint rewriting. -; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck %s +; RUN: opt %s -rewrite-statepoints-for-gc -spp-rematerialization-threshold=0 -S | FileCheck %s declare void @foo() Index: test/Transforms/RewriteStatepointsForGC/liveness-basics.ll =================================================================== --- test/Transforms/RewriteStatepointsForGC/liveness-basics.ll +++ test/Transforms/RewriteStatepointsForGC/liveness-basics.ll @@ -1,6 +1,6 @@ ; A collection of liveness test cases to ensure we're reporting the ; correct live values at statepoints -; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s +; RUN: opt -rewrite-statepoints-for-gc -spp-rematerialization-threshold=0 -S < %s | FileCheck %s ; Tests to make sure we consider %obj live in both the taken and untaken Index: test/Transforms/RewriteStatepointsForGC/relocation.ll =================================================================== --- test/Transforms/RewriteStatepointsForGC/relocation.ll +++ test/Transforms/RewriteStatepointsForGC/relocation.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s +; RUN: opt %s -rewrite-statepoints-for-gc -spp-rematerialization-threshold=0 -S 2>&1 | FileCheck %s declare void @foo() Index: test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll =================================================================== --- test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll +++ test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll @@ -0,0 +1,197 @@ +; RUN: opt %s -rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s + +declare void @use_obj16(i16 addrspace(1)*) +declare void @use_obj32(i32 addrspace(1)*) +declare void @use_obj64(i64 addrspace(1)*) +declare void @do_safepoint() + +define void @"test_gep_const"(i32 addrspace(1)* %base) gc "statepoint-example" { +; CHECK-LABEL: test_gep_const +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15 + ; CHECK: getelementptr + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: getelementptr + call void @use_obj32(i32 addrspace(1)* %base) + call void @use_obj32(i32 addrspace(1)* %ptr) + ret void +} + +define void @"test_gep_idx"(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" { +; CHECK-LABEL: test_gep_idx +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx + ; CHECK: getelementptr + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: getelementptr + call void @use_obj32(i32 addrspace(1)* %base) + call void @use_obj32(i32 addrspace(1)* %ptr) + ret void +} + +define void @"test_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" { +; CHECK-LABEL: test_bitcast +entry: + %ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)* + ; CHECK: bitcast + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: bitcast + call void @use_obj32(i32 addrspace(1)* %base) + call void @use_obj64(i64 addrspace(1)* %ptr) + ret void +} + +define void @"test_bitcast_gep"(i32 addrspace(1)* %base) gc "statepoint-example" { +; CHECK-LABEL: test_bitcast_gep +entry: + %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15 + ; CHECK: getelementptr + %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)* + ; CHECK: bitcast + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: getelementptr + ; CHECK: bitcast + call void @use_obj32(i32 addrspace(1)* %base) + call void @use_obj64(i64 addrspace(1)* %ptr.cast) + ret void +} + +define void @"test_intersecting_chains"(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" { +; CHECK-LABEL: test_intersecting_chains +entry: + %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15 + ; CHECK: getelementptr + %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)* + ; CHECK: bitcast + %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)* + ; CHECK: bitcast + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: getelementptr + ; CHECK: bitcast + ; CHECK: getelementptr + ; CHECK: bitcast + call void @use_obj64(i64 addrspace(1)* %ptr.cast) + call void @use_obj16(i16 addrspace(1)* %ptr.cast2) + ret void +} + +define void @"test_cost_threshold"(i32 addrspace(1)* %base, i32 %idx1, i32 %idx2, i32 %idx3) gc "statepoint-example" { +; CHECK-LABEL: test_cost_threshold +entry: + %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15 + ; CHECK: getelementptr + %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 %idx1 + ; CHECK: getelementptr + %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 %idx2 + ; CHECK: getelementptr + %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 %idx3 + ; CHECK: getelementptr + %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)* + ; CHECK: bitcast + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: gc.relocate + ; CHECK: bitcast + call void @use_obj64(i64 addrspace(1)* %ptr.cast) + ret void +} + +define void @"test_two_derived"(i32 addrspace(1)* %base) gc "statepoint-example" { +; CHECK-LABEL: test_two_derived +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15 + %ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12 + ; CHECK: getelementptr + ; CHECK: getelementptr + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: getelementptr + ; CHECK: getelementptr + call void @use_obj32(i32 addrspace(1)* %ptr) + call void @use_obj32(i32 addrspace(1)* %ptr2) + ret void +} + +define void @"test_gep_smallint_array"([3 x i32] addrspace(1)* %base) gc "statepoint-example" { +; CHECK-LABEL: test_gep_smallint_array +entry: + %ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2 + ; CHECK: getelementptr + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: getelementptr + call void @use_obj32(i32 addrspace(1)* %ptr) + ret void +} + +declare i32 @fake_personality_function() + +define void @"test_invoke"(i32 addrspace(1)* %base) gc "statepoint-example" { +; CHECK-LABEL: test_invoke +entry: + %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15 + ; CHECK: getelementptr + %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)* + ; CHECK: bitcast + %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)* + ; CHECK: bitcast + %sp = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + to label %normal unwind label %exception + +normal: + ; CHECK-LABEL: normal: + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: getelementptr + ; CHECK: bitcast + ; CHECK: getelementptr + ; CHECK: bitcast + call void @use_obj64(i64 addrspace(1)* %ptr.cast) + call void @use_obj16(i16 addrspace(1)* %ptr.cast2) + ret void + +exception: + ; CHECK-LABEL: exception: + %landing_pad4 = landingpad { i8*, i32 } personality i32 ()* @fake_personality_function + cleanup + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: getelementptr + ; CHECK: bitcast + ; CHECK: getelementptr + ; CHECK: bitcast + call void @use_obj64(i64 addrspace(1)* %ptr.cast) + call void @use_obj16(i16 addrspace(1)* %ptr.cast2) + ret void +} + +define void @"test_loop"(i32 addrspace(1)* %base) gc "statepoint-example" { +; CHECK-LABEL: test_loop +entry: + %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15 + ; CHECK: getelementptr + br label %loop + +loop: + ; CHECK: phi i32 addrspace(1)* [ %ptr.gep, %entry ], [ %ptr.gep.remat, %loop ] + ; CHECK: phi i32 addrspace(1)* [ %base, %entry ], [ %base.relocated.casted, %loop ] + call void @use_obj32(i32 addrspace(1)* %ptr.gep) + %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + ; CHECK: gc.relocate + ; CHECK: bitcast + ; CHECK: getelementptr + br label %loop +} + +declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)