Index: include/llvm/IR/Statepoint.h =================================================================== --- include/llvm/IR/Statepoint.h +++ include/llvm/IR/Statepoint.h @@ -194,12 +194,12 @@ /// The index into the associate statepoint's argument list /// which contains the base pointer of the pointer whose /// relocation this gc.relocate describes. - int basePtrIndex() { + unsigned basePtrIndex() { return cast(RelocateCS.getArgument(1))->getZExtValue(); } /// The index into the associate statepoint's argument list which /// contains the pointer whose relocation this gc.relocate describes. - int derivedPtrIndex() { + unsigned derivedPtrIndex() { return cast(RelocateCS.getArgument(2))->getZExtValue(); } Value *basePtr() { Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" @@ -71,6 +72,10 @@ "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); +static cl::opt DisableGCOpts( + "disable-cgp-gc-opts", cl::Hidden, cl::init(false), + cl::desc("Disable GC optimizations in CodeGenPrepare")); + static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -164,6 +169,7 @@ bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); + bool SimplifyOffsetableRelocate(Instruction &I); bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); bool OptimizeInst(Instruction *I, bool& ModifiedDT); bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); @@ -297,6 +303,18 @@ EverMadeChange |= MadeChange; } + if (!DisableGCOpts) { + bool MadeChange = false; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isStatepoint(I)) + MadeChange |= SimplifyOffsetableRelocate(I); + + if (MadeChange) + ModifiedDT = true; + EverMadeChange |= MadeChange; + } + if (ModifiedDT && DT) DT->recalculate(F); @@ -520,6 +538,103 @@ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +// Turns this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = relocate(%tok, i32 4, i32 4) +// %ptr' = relocate(%tok, i32 4, i32 5) +// %val = load %ptr' +// +// into this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = gc.relocate(%tok, i32 4, i32 4) +// %ptr' = gep %base' + 15 +// %val = load %ptr' +bool CodeGenPrepare::SimplifyOffsetableRelocate(Instruction &I) +{ + bool MadeChange = false; + SmallVector AllRelocateCalls; + for (auto *U : I.users()) + if (isGCRelocate(dyn_cast(U))) + // Collect all the relocate calls associated with a statepoint + AllRelocateCalls.push_back(U); + // We need atleast one base pointer relocation + one derived pointer + // relocation to mangle + if (AllRelocateCalls.size() < 2) + return false; + // Collect information in two maps: one primarily for locating the base object + // while filling the second map; the second map is the final structure holding + // a mapping between Base and corresponding Derived relocate calls + DenseMap, IntrinsicInst *> RelocateMap; + DenseMap> RelocateMasterMap; + for (auto &U : AllRelocateCalls) { + GCRelocateOperands ThisRelocate(U); + IntrinsicInst *I = dyn_cast(U); + auto K = std::make_pair(ThisRelocate.basePtrIndex(), + ThisRelocate.derivedPtrIndex()); + RelocateMap.insert(std::make_pair(K, I)); + } + for (auto &Item: RelocateMap) { + std::pair Key = Item.first; + if (Key.first == Key.second) + // Base relocation: nothing to insert + continue; + IntrinsicInst *I = Item.second; + auto BaseKey = std::make_pair(Key.first, Key.first); + IntrinsicInst *Base = RelocateMap[BaseKey]; + if (!Base) + // If we don't have a handle on the relocated base object, how can we gep + // to get the relocated derived object? We could probably do some + // computation of offsets between derived objects, but that's out of scope + // for now. + continue; + auto NewVal = RelocateMasterMap[Base]; + RelocateMasterMap.erase(Base); + NewVal.insert(NewVal.end(), I); + RelocateMasterMap.insert(std::make_pair(Base, NewVal)); + } + + if (RelocateMasterMap.empty()) + return false; + for (auto &Item : RelocateMasterMap) { + IntrinsicInst *GEPBase = Item.first; + GCRelocateOperands MasterRelocate(GEPBase); + for (auto &Target : Item.second) { + GCRelocateOperands ThisRelocate(Target); + if (ThisRelocate.basePtrIndex() != MasterRelocate.basePtrIndex() || + ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) { + // Not relocating a derived object with the original base object OR a + // duplicate relocate call. TODO: coalesce duplicates. + continue; + } + Value *Base = ThisRelocate.basePtr(); + auto Derived = dyn_cast(ThisRelocate.derivedPtr()); + if (Derived && Derived->getPointerOperand() == Base) { + SmallVector OffsetV; + for (unsigned i = 1; i < Derived->getNumOperands(); i++) + OffsetV.push_back(Derived->getOperand(i)); + // Create a Builder and replace the target callsite with a gep + IRBuilder<> Builder(Target); + Builder.SetCurrentDebugLocation(Target->getDebugLoc()); + Value *Repl = Builder.CreateInBoundsGEP(GEPBase, makeArrayRef(OffsetV)); + cast(Repl)->removeFromParent(); + cast(Repl)->insertAfter(GEPBase); + Repl->takeName(Target); + Target->replaceAllUsesWith(Repl); + Target->eraseFromParent(); + + MadeChange = true; + } + } + } + return MadeChange; +} + /// SinkCast - Sink the specified cast instruction into its user blocks static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); Index: test/Transforms/CodeGenPrepare/statepoint-relocate.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/statepoint-relocate.ll @@ -0,0 +1,63 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare zeroext i1 @return_i1() + +define i32 @test_sor_basic(i32* %base) { +; CHECK: getelementptr inbounds i32* %base, i32 15 +; CHECK: getelementptr inbounds i32* %base-new, i32 15 +entry: + %ptr = getelementptr inbounds i32* %base, i32 15 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ret = load i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_two_derived(i32* %base) { +; CHECK: getelementptr inbounds i32* %base, i32 15 +; CHECK: getelementptr inbounds i32* %base, i32 12 +; CHECK: getelementptr inbounds i32* %base-new, i32 15 +; CHECK: getelementptr inbounds i32* %base-new, i32 12 +entry: + %ptr = getelementptr inbounds i32* %base, i32 15 + %ptr2 = getelementptr inbounds i32* %base, i32 12 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6) + %ret = load i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_ooo(i32* %base) { +; CHECK: getelementptr inbounds i32* %base, i32 15 +; CHECK: getelementptr inbounds i32* %base-new, i32 15 +entry: + %ptr = getelementptr inbounds i32* %base, i32 15 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4) + %ret = load i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_noop(i32* %base) { +; CHECK: getelementptr inbounds i32* %base, i32 15 +; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) +; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6) +entry: + %ptr = getelementptr inbounds i32* %base, i32 15 + %ptr2 = getelementptr inbounds i32* %base, i32 12 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6) + %ret = load i32* %ptr-new + ret i32 %ret +} + +declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...) +declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)