Index: include/llvm/IR/Statepoint.h =================================================================== --- include/llvm/IR/Statepoint.h +++ include/llvm/IR/Statepoint.h @@ -194,12 +194,12 @@ /// The index into the associate statepoint's argument list /// which contains the base pointer of the pointer whose /// relocation this gc.relocate describes. - int basePtrIndex() { + unsigned basePtrIndex() { return cast(RelocateCS.getArgument(1))->getZExtValue(); } /// The index into the associate statepoint's argument list which /// contains the pointer whose relocation this gc.relocate describes. - int derivedPtrIndex() { + unsigned derivedPtrIndex() { return cast(RelocateCS.getArgument(2))->getZExtValue(); } Value *basePtr() { Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" @@ -72,6 +73,10 @@ "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); +static cl::opt + DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), + cl::desc("Disable GC optimizations in CodeGenPrepare")); + static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -183,6 +188,7 @@ const SmallVectorImpl &Exts, unsigned CreatedInst); bool splitBranchCondition(Function &F); + bool simplifyOffsetableRelocate(Instruction &I); }; } @@ -248,7 +254,7 @@ BasicBlock *BB = I++; bool ModifiedDTOnIteration = false; MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); - + // Restart BB iteration if the dominator tree of the Function was changed ModifiedDT |= ModifiedDTOnIteration; if (ModifiedDTOnIteration) @@ -298,6 +304,16 @@ EverMadeChange |= MadeChange; } + if (!DisableGCOpts) { + SmallVector Statepoints; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isStatepoint(I)) + Statepoints.push_back(&I); + for (auto &I : Statepoints) + EverMadeChange |= simplifyOffsetableRelocate(*I); + } + if (ModifiedDT && DT) DT->recalculate(F); @@ -521,6 +537,144 @@ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +// Computes a map of base pointer relocation instructions to corresponding +// derived pointer relocation instructions given a vector of all relocate calls +static void computeBaseDerivedRelocateMap( + const SmallVectorImpl &AllRelocateCalls, + DenseMap> & + RelocateInstMap) { + // Collect information in two maps: one primarily for locating the base object + // while filling the second map; the second map is the final structure holding + // a mapping between Base and corresponding Derived relocate calls + DenseMap, IntrinsicInst *> RelocateIdxMap; + for (auto &U : AllRelocateCalls) { + GCRelocateOperands ThisRelocate(U); + IntrinsicInst *I = cast(U); + auto K = std::make_pair(ThisRelocate.basePtrIndex(), + ThisRelocate.derivedPtrIndex()); + RelocateIdxMap.insert(std::make_pair(K, I)); + } + for (auto &Item : RelocateIdxMap) { + std::pair Key = Item.first; + if (Key.first == Key.second) + // Base relocation: nothing to insert + continue; + + IntrinsicInst *I = Item.second; + auto BaseKey = std::make_pair(Key.first, Key.first); + IntrinsicInst *Base = RelocateIdxMap[BaseKey]; + if (!Base) + // TODO: We might want to insert a new base object relocate and gep off + // that, if there are enough derived object relocates. + continue; + RelocateInstMap[Base].push_back(I); + } +} + +// Accepts a GEP and extracts the operands into a vector provided they're all +// small integer constants +static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, + SmallVectorImpl &OffsetV) { + for (unsigned i = 1; i < GEP->getNumOperands(); i++) { + // Only accept small constant integer operands + auto Op = dyn_cast(GEP->getOperand(i)); + if (!Op || Op->getZExtValue() > 20) + return false; + } + + for (unsigned i = 1; i < GEP->getNumOperands(); i++) + OffsetV.push_back(GEP->getOperand(i)); + return true; +} + +// Takes a RelocatedBase (base pointer relocation instruction) and Targets to +// replace, computes a replacement, and affects it. +static bool +simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, + const SmallVectorImpl &Targets) { + bool MadeChange = false; + for (auto &ToReplace : Targets) { + GCRelocateOperands MasterRelocate(RelocatedBase); + GCRelocateOperands ThisRelocate(ToReplace); + + assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() && + "Not relocating a derived object of the original base object"); + if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) { + // A duplicate relocate call. TODO: coalesce duplicates. + continue; + } + + Value *Base = ThisRelocate.basePtr(); + auto Derived = dyn_cast(ThisRelocate.derivedPtr()); + if (!Derived || Derived->getPointerOperand() != Base) + continue; + + SmallVector OffsetV; + if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) + continue; + + // Create a Builder and replace the target callsite with a gep + IRBuilder<> Builder(ToReplace); + Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); + Value *Replacement = + Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV)); + Instruction *ReplacementInst = cast(Replacement); + ReplacementInst->removeFromParent(); + ReplacementInst->insertAfter(RelocatedBase); + Replacement->takeName(ToReplace); + ToReplace->replaceAllUsesWith(Replacement); + ToReplace->eraseFromParent(); + + MadeChange = true; + } + return MadeChange; +} + +// Turns this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = relocate(%tok, i32 4, i32 4) +// %ptr' = relocate(%tok, i32 4, i32 5) +// %val = load %ptr' +// +// into this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = gc.relocate(%tok, i32 4, i32 4) +// %ptr' = gep %base' + 15 +// %val = load %ptr' +bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { + bool MadeChange = false; + SmallVector AllRelocateCalls; + + for (auto *U : I.users()) + if (isGCRelocate(dyn_cast(U))) + // Collect all the relocate calls associated with a statepoint + AllRelocateCalls.push_back(U); + + // We need atleast one base pointer relocation + one derived pointer + // relocation to mangle + if (AllRelocateCalls.size() < 2) + return false; + + // RelocateInstMap is a mapping from the base relocate instruction to the + // corresponding derived relocate instructions + DenseMap> RelocateInstMap; + computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); + if (RelocateInstMap.empty()) + return false; + + for (auto &Item : RelocateInstMap) + // Item.first is the RelocatedBase to offset against + // Item.second is the vector of Targets to replace + MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); + return MadeChange; +} + /// SinkCast - Sink the specified cast instruction into its user blocks static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); Index: test/Transforms/CodeGenPrepare/statepoint-relocate.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/statepoint-relocate.ll @@ -0,0 +1,88 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare zeroext i1 @return_i1() + +define i32 @test_sor_basic(i32* %base) { +; CHECK: getelementptr i32* %base, i32 15 +; CHECK: getelementptr i32* %base-new, i32 15 +entry: + %ptr = getelementptr i32* %base, i32 15 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ret = load i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_two_derived(i32* %base) { +; CHECK: getelementptr i32* %base, i32 15 +; CHECK: getelementptr i32* %base, i32 12 +; CHECK: getelementptr i32* %base-new, i32 15 +; CHECK: getelementptr i32* %base-new, i32 12 +entry: + %ptr = getelementptr i32* %base, i32 15 + %ptr2 = getelementptr i32* %base, i32 12 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6) + %ret = load i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_ooo(i32* %base) { +; CHECK: getelementptr i32* %base, i32 15 +; CHECK: getelementptr i32* %base-new, i32 15 +entry: + %ptr = getelementptr i32* %base, i32 15 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4) + %ret = load i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_gep_smallint([3 x i32]* %base) { +; CHECK: getelementptr [3 x i32]* %base, i32 0, i32 2 +; CHECK: getelementptr [3 x i32]* %base-new, i32 0, i32 2 +entry: + %ptr = getelementptr [3 x i32]* %base, i32 0, i32 2 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr) + %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ret = load i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_gep_largeint([3 x i32]* %base) { +; CHECK: getelementptr [3 x i32]* %base, i32 0, i32 21 +; CHECK-NOT: getelementptr [3 x i32]* %base-new, i32 0, i32 21 +entry: + %ptr = getelementptr [3 x i32]* %base, i32 0, i32 21 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr) + %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ret = load i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_noop(i32* %base) { +; CHECK: getelementptr i32* %base, i32 15 +; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) +; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6) +entry: + %ptr = getelementptr i32* %base, i32 15 + %ptr2 = getelementptr i32* %base, i32 12 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6) + %ret = load i32* %ptr-new + ret i32 %ret +} + +declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...) +declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32) +declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)