Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -71,6 +71,10 @@ "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); +static cl::opt DisableGCOpts( + "disable-cgp-gc-opts", cl::Hidden, cl::init(false), + cl::desc("Disable GC optimizations in CodeGenPrepare")); + static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -164,6 +168,7 @@ bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); + bool SimplifyOffsetableRelocate(BasicBlock &BB); bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); bool OptimizeInst(Instruction *I, bool& ModifiedDT); bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); @@ -297,6 +302,12 @@ EverMadeChange |= MadeChange; } + if (!DisableGCOpts) { + // Should this work across basic blocks? + for (BasicBlock &BB : F) + EverMadeChange |= SimplifyOffsetableRelocate(BB); + } + if (ModifiedDT && DT) DT->recalculate(F); @@ -519,6 +530,80 @@ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +// Turns this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = relocate(%tok, i32 4, i32 4) +// %ptr' = relocate(%tok, i32 4, i32 5) +// %val = load %ptr' +// +// into this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = gc.relocate(%tok, i32 4, i32 4) +// %ptr' = gep %base' + 15 +// %val = load %ptr' +bool CodeGenPrepare::SimplifyOffsetableRelocate(BasicBlock &BB) +{ + bool MadeChange = false; + SmallVector RelocateCalls; + for (Instruction &I : BB) { + if (CallInst *CI = dyn_cast(&I)) { + IntrinsicInst *II = dyn_cast(CI); + if (II && II->getIntrinsicID() == Intrinsic::experimental_gc_relocate) + RelocateCalls.push_back(II); + } + } + if (RelocateCalls.size() < 2) + return false; + // Assume that the first relocate in the basic block is relocating the base + // object. TODO: what can be done to get rid of this assumption? + IntrinsicInst *MasterRelocate = RelocateCalls[0]; + RelocateCalls.erase(RelocateCalls.begin()); + IntrinsicInst *MasterStatepoint = + dyn_cast(MasterRelocate->getArgOperand(0)); + unsigned BaseIdx = + dyn_cast(MasterRelocate->getArgOperand(1))->getZExtValue(); + if (BaseIdx != + dyn_cast(MasterRelocate->getArgOperand(2))->getZExtValue()) + // The master call is not relocating the base object: if we don't have a + // handle on the relocated base object, how can we gep to get the relocated + // derived object? We could probably do some computation of offsets between + // derived objects, but that's out of scope for now. + return false; + Value *Base = MasterStatepoint->getArgOperand(BaseIdx); + for (IntrinsicInst *II : RelocateCalls) { + if (dyn_cast(II->getArgOperand(0)) != MasterStatepoint || + dyn_cast(II->getArgOperand(1))->getZExtValue() != BaseIdx) + // Either this relocate call is for some other statepoint, or it's not + // relocating a derived object with the original base object + continue; + unsigned DerivedIdx = dyn_cast(II->getArgOperand(2))->getZExtValue(); + if (DerivedIdx == BaseIdx) + // A duplicate relocate call? Won't an earlier optimization coalesce such + // duplicates? + continue; + Instruction *Derived = dyn_cast(MasterStatepoint->getArgOperand(DerivedIdx)); + if (Derived && Derived->getOpcode() == Instruction::GetElementPtr && + Derived->getOperand(0) == Base) { + const int Offset = dyn_cast(Derived->getOperand(1))->getSExtValue(); + + // Create a Builder and replace the target callsite with a gep + IRBuilder<> Builder(II->getContext()); + Builder.SetInsertPoint(II); + Builder.SetCurrentDebugLocation(II->getDebugLoc()); + Value *Repl = Builder.CreateInBoundsGEP(MasterRelocate, Builder.getInt32(Offset)); + II->replaceUsesOfWith(II, Repl); + + MadeChange = true; + } + } + return MadeChange; +} /// SinkCast - Sink the specified cast instruction into its user blocks static bool SinkCast(CastInst *CI) { Index: test/Transforms/CodeGenPrepare/statepoint-relocate.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/statepoint-relocate.ll @@ -0,0 +1,21 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare zeroext i1 @return_i1() + +define i32 @test_offsetable_relocate(i32* %base) { +; CHECK: getelementptr inbounds i32* %base, i32 15 +; CHECK: getelementptr inbounds i32* %base-new, i32 15 +entry: + %ptr = getelementptr inbounds i32* %base, i32 15 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ret = load i32* %ptr-new + ret i32 %ret +} + +declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...) +declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)