Index: include/llvm/IR/Statepoint.h =================================================================== --- include/llvm/IR/Statepoint.h +++ include/llvm/IR/Statepoint.h @@ -68,6 +68,11 @@ ValueTy *actualCallee() { return StatepointCS.getArgument(0); } + /// Return the value actually being called or invoked. + Value *getArgOperand(int N) { + return StatepointCS.getArgument(N); + } + /// Number of arguments to be passed to the actual callee. int numCallArgs() { return cast(StatepointCS.getArgument(1))->getZExtValue(); @@ -194,12 +199,12 @@ /// The index into the associate statepoint's argument list /// which contains the base pointer of the pointer whose /// relocation this gc.relocate describes. - int basePtrIndex() { + unsigned basePtrIndex() { return cast(RelocateCS.getArgument(1))->getZExtValue(); } /// The index into the associate statepoint's argument list which /// contains the pointer whose relocation this gc.relocate describes. - int derivedPtrIndex() { + unsigned derivedPtrIndex() { return cast(RelocateCS.getArgument(2))->getZExtValue(); } Value *basePtr() { Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" @@ -71,6 +72,10 @@ "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); +static cl::opt DisableGCOpts( + "disable-cgp-gc-opts", cl::Hidden, cl::init(false), + cl::desc("Disable GC optimizations in CodeGenPrepare")); + static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -164,6 +169,7 @@ bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); + bool SimplifyOffsetableRelocate(Instruction &I); bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); bool OptimizeInst(Instruction *I, bool& ModifiedDT); bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); @@ -297,6 +303,13 @@ EverMadeChange |= MadeChange; } + if (!DisableGCOpts) { + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isStatepoint(I)) + EverMadeChange |= SimplifyOffsetableRelocate(I); + } + if (ModifiedDT && DT) DT->recalculate(F); @@ -519,6 +532,81 @@ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +// Turns this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = relocate(%tok, i32 4, i32 4) +// %ptr' = relocate(%tok, i32 4, i32 5) +// %val = load %ptr' +// +// into this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = gc.relocate(%tok, i32 4, i32 4) +// %ptr' = gep %base' + 15 +// %val = load %ptr' +bool CodeGenPrepare::SimplifyOffsetableRelocate(Instruction &I) +{ + bool MadeChange = false; + SmallVector RelocateCalls; + for (auto *U : I.users()) + if (isGCRelocate(dyn_cast(U))) + // Collect all the relocate calls associated with a statepoint + RelocateCalls.push_back(U); + // We need atleast one base pointer relocation + one derived pointer + // relocation to mangle + if (RelocateCalls.size() < 2) + return false; + // TODO: we collect only one base reclocate + IntrinsicInst *Target = nullptr; + for (auto &U : RelocateCalls) { + GCRelocateOperands ThisRelocate(U); + if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) { + Target = cast(U); + break; + } + } + if (!Target) + // The master call is not relocating the base object: if we don't have a + // handle on the relocated base object, how can we gep to get the relocated + // derived object? We could probably do some computation of offsets between + // derived objects, but that's out of scope for now. + return false; + for (auto &U : RelocateCalls) { + GCRelocateOperands ThisRelocate(U); + GCRelocateOperands MasterRelocate(Target); + if (ThisRelocate.basePtrIndex() != MasterRelocate.basePtrIndex() || + ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) { + // Not relocating a derived object with the original base object OR a + // duplicate relocate call. TODO: coalesce duplicates. + continue; + } + Value *Base = ThisRelocate.basePtr(); + Instruction *Derived = dyn_cast(ThisRelocate.derivedPtr()); + if (Derived && Derived->getOpcode() == Instruction::GetElementPtr && + Derived->getOperand(0) == Base) { + // TODO: what if the offset is a vector? + const int Offset = dyn_cast(Derived->getOperand(1))->getSExtValue(); + + // Create a Builder and replace the target callsite with a gep + IntrinsicInst *Source = cast(U); + IRBuilder<> Builder(Source); + Builder.SetInsertPoint(Source); + Builder.SetCurrentDebugLocation(Source->getDebugLoc()); + Value *Repl = Builder.CreateInBoundsGEP(Target, Builder.getInt32(Offset)); + Repl->takeName(Source); + Source->replaceAllUsesWith(Repl); + Source->eraseFromParent(); + + MadeChange = true; + } + } + return MadeChange; +} /// SinkCast - Sink the specified cast instruction into its user blocks static bool SinkCast(CastInst *CI) { Index: test/Transforms/CodeGenPrepare/statepoint-relocate.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/statepoint-relocate.ll @@ -0,0 +1,21 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare zeroext i1 @return_i1() + +define i32 @test_offsetable_relocate(i32* %base) { +; CHECK: getelementptr inbounds i32* %base, i32 15 +; CHECK: getelementptr inbounds i32* %base-new, i32 15 +entry: + %ptr = getelementptr inbounds i32* %base, i32 15 + %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5) + %ret = load i32* %ptr-new + ret i32 %ret +} + +declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...) +declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)