Index: llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp @@ -880,6 +880,71 @@ return newFunction; } +/// Scan the extraction region for lifetime markers which reference inputs. +/// Erase these markers. Return the inputs which were referenced. +/// +/// The extraction region is defined by a set of blocks (\p Blocks), and a set +/// of allocas which will be moved from the caller function into the extracted +/// function (\p SunkAllocas). +static SetVector +eraseLifetimeMarkersOnInputs(const SetVector &Blocks, + const SetVector &SunkAllocas) { + SetVector InputObjectsWithLifetime; + for (BasicBlock *BB : Blocks) { + for (auto It = BB->begin(), End = BB->end(); It != End;) { + auto *II = dyn_cast(&*It); + ++It; + if (!II || !II->isLifetimeStartOrEnd()) + continue; + + // Get the memory operand of the lifetime marker. If the underlying + // object is a sunk alloca, or is otherwise defined in the extraction + // region, the lifetime marker must not be erased. + Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); + if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) + continue; + + InputObjectsWithLifetime.insert(Mem); + II->eraseFromParent(); + } + } + return InputObjectsWithLifetime; +} + +/// Insert lifetime start/end markers surrounding the call to the new function +/// for objects defined in the caller. +static void insertLifetimeMarkersSurroundingCall(Module *M, + ArrayRef Objects, + CallInst *TheCall) { + if (Objects.empty()) + return; + + LLVMContext &Ctx = M->getContext(); + auto Int8PtrTy = Type::getInt8PtrTy(Ctx); + auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); + auto StartFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_start, Int8PtrTy); + auto EndFn = llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::lifetime_end, + Int8PtrTy); + Instruction *Term = TheCall->getParent()->getTerminator(); + for (Value *Mem : Objects) { + assert((!isa(Mem) || + cast(Mem)->getFunction() == TheCall->getFunction()) && + "Input memory not defined in original function"); + Value *MemAsI8Ptr = nullptr; + if (Mem->getType() == Int8PtrTy) + MemAsI8Ptr = Mem; + else + MemAsI8Ptr = + CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); + + auto StartMarker = CallInst::Create(StartFn, {NegativeOne, MemAsI8Ptr}); + StartMarker->insertBefore(TheCall); + auto EndMarker = CallInst::Create(EndFn, {NegativeOne, MemAsI8Ptr}); + EndMarker->insertBefore(Term); + } +} + /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. @@ -1119,6 +1184,10 @@ break; } + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, call); + return call; } @@ -1181,71 +1250,6 @@ MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } -/// Scan the extraction region for lifetime markers which reference inputs. -/// Erase these markers. Return the inputs which were referenced. -/// -/// The extraction region is defined by a set of blocks (\p Blocks), and a set -/// of allocas which will be moved from the caller function into the extracted -/// function (\p SunkAllocas). -static SetVector -eraseLifetimeMarkersOnInputs(const SetVector &Blocks, - const SetVector &SunkAllocas) { - SetVector InputObjectsWithLifetime; - for (BasicBlock *BB : Blocks) { - for (auto It = BB->begin(), End = BB->end(); It != End;) { - auto *II = dyn_cast(&*It); - ++It; - if (!II || !II->isLifetimeStartOrEnd()) - continue; - - // Get the memory operand of the lifetime marker. If the underlying - // object is a sunk alloca, or is otherwise defined in the extraction - // region, the lifetime marker must not be erased. - Value *Mem = II->getOperand(1)->stripInBoundsOffsets(); - if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) - continue; - - InputObjectsWithLifetime.insert(Mem); - II->eraseFromParent(); - } - } - return InputObjectsWithLifetime; -} - -/// Insert lifetime start/end markers surrounding the call to the new function -/// for objects defined in the caller. -static void insertLifetimeMarkersSurroundingCall( - Module *M, const SetVector &InputObjectsWithLifetime, - CallInst *TheCall) { - if (InputObjectsWithLifetime.empty()) - return; - - LLVMContext &Ctx = M->getContext(); - auto Int8PtrTy = Type::getInt8PtrTy(Ctx); - auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); - auto LifetimeStartFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_start, Int8PtrTy); - auto LifetimeEndFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_end, Int8PtrTy); - for (Value *Mem : InputObjectsWithLifetime) { - assert((!isa(Mem) || - cast(Mem)->getFunction() == TheCall->getFunction()) && - "Input memory not defined in original function"); - Value *MemAsI8Ptr = nullptr; - if (Mem->getType() == Int8PtrTy) - MemAsI8Ptr = Mem; - else - MemAsI8Ptr = - CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); - - auto StartMarker = - CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr}); - StartMarker->insertBefore(TheCall); - auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr}); - EndMarker->insertAfter(TheCall); - } -} - Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; @@ -1389,7 +1393,8 @@ // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), - InputObjectsWithLifetime, TheCall); + InputObjectsWithLifetime.getArrayRef(), + TheCall); // Propagate personality info to the new function if there is one. if (oldFunction->hasPersonalityFn()) Index: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineDebug.ll =================================================================== --- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineDebug.ll +++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineDebug.ll @@ -23,7 +23,8 @@ ; CHECK-LABEL: @caller ; CHECK: codeRepl.i: -; CHECK-NEXT: call void @callee.2.if.then(i32 %v, i32* %mul.loc.i), !dbg ![[DBG2:[0-9]+]] +; CHECK-NOT: br label +; CHECK: call void @callee.2.if.then(i32 %v, i32* %mul.loc.i), !dbg ![[DBG2:[0-9]+]] define i32 @caller(i32 %v) !dbg !8 { entry: %call = call i32 @callee(i32 %v), !dbg !14 @@ -53,7 +54,8 @@ ; CHECK-LABEL: @caller2 ; CHECK: codeRepl.i: -; CHECK-NEXT: call void @callee2.1.if.then(i32 %v, i32* %sub.loc.i), !dbg ![[DBG4:[0-9]+]] +; CHECK-NOT: br label +; CHECK: call void @callee2.1.if.then(i32 %v, i32* %sub.loc.i), !dbg ![[DBG4:[0-9]+]] define i32 @caller2(i32 %v) !dbg !21 { entry: %call = call i32 @callee2(i32 %v), !dbg !22 Index: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineInvokeProducesOutVal.ll =================================================================== --- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineInvokeProducesOutVal.ll +++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineInvokeProducesOutVal.ll @@ -26,7 +26,11 @@ ; CHECK-LABEL: bb: ; CHECK-NEXT: [[CALL26LOC:%.*]] = alloca i8* ; CHECK-LABEL: codeRepl.i: +; CHECK-NEXT: %lt.cast.i = bitcast i8** [[CALL26LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast.i) ; CHECK-NEXT: call void @bar.1.bb1(i8** [[CALL26LOC]]) +; CHECK-NEXT: %call26.reload.i = load i8*, i8** [[CALL26LOC]] +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast.i) define i8* @dummy_caller(i32 %arg) { bb: %tmp = tail call i8* @bar(i32 %arg) Index: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineVarArgsDebug.ll =================================================================== --- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineVarArgsDebug.ll +++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineVarArgsDebug.ll @@ -19,7 +19,8 @@ ; CHECK-LABEL: @caller ; CHECK: codeRepl.i: -; CHECK-NEXT: call void (i32, i32*, ...) @callee.1.if.then(i32 %v, i32* %mul.loc.i, i32 99), !dbg ![[DBG2:[0-9]+]] +; CHECK-NOT: br label +; CHECK: call void (i32, i32*, ...) @callee.1.if.then(i32 %v, i32* %mul.loc.i, i32 99), !dbg ![[DBG2:[0-9]+]] define i32 @caller(i32 %v) !dbg !8 { entry: %call = call i32 (i32, ...) @callee(i32 %v, i32 99), !dbg !14 Index: llvm/trunk/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll =================================================================== --- llvm/trunk/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll +++ llvm/trunk/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll @@ -15,7 +15,7 @@ ; CHECK: call {{.*}}@sideeffect( ; CHECK: call {{.*}}@realloc( ; CHECK-LABEL: codeRepl: -; CHECK-NEXT: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc) +; CHECK: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc) ; CHECK-LABEL: cleanup: ; CHECK-NEXT: phi i8* [ null, %if.then ], [ %call, %if.end ], [ %retval.0.ce.reload, %codeRepl ] define i8* @realloc2(i8* %ptr, i64 %size) { Index: llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll =================================================================== --- llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll +++ llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll @@ -30,13 +30,13 @@ ; CHECK-LABEL: codeRepl: ; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8* -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]]) -; CHECK: [[local2_cast:%.*]] = bitcast i256* %local2 to i8* -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]]) -; CHECK: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]]) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]]) -; CHECK: br i1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]]) +; CHECK-NEXT: [[local2_cast:%.*]] = bitcast i256* %local2 to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK-NEXT: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK-NEXT: br i1 outlinedPath: ; These two uses of stack slots are overlapping. This should prevent Index: llvm/trunk/test/Transforms/HotColdSplit/split-phis-in-exit-blocks.ll =================================================================== --- llvm/trunk/test/Transforms/HotColdSplit/split-phis-in-exit-blocks.ll +++ llvm/trunk/test/Transforms/HotColdSplit/split-phis-in-exit-blocks.ll @@ -12,8 +12,11 @@ ; CHECK-NEXT: ] ; ; CHECK: codeRepl: +; CHECK-NEXT: bitcast +; CHECK-NEXT: lifetime.start ; CHECK-NEXT: call void @pluto.cold.1(i1* %tmp8.ce.loc) ; CHECK-NEXT: %tmp8.ce.reload = load i1, i1* %tmp8.ce.loc +; CHECK-NEXT: lifetime.end ; CHECK-NEXT: br label %bb7 ; ; CHECK: bb7: