Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -880,16 +880,17 @@ return newFunction; } -/// Scan the extraction region for lifetime markers which reference inputs. -/// Erase these markers. Return the inputs which were referenced. +/// Erase lifetime.start markers which reference inputs to the extraction +/// region, and insert the referenced memory into \p LifetimesStart. Do the same +/// with lifetime.end markers (but insert them into \p LifetimesEnd). /// /// The extraction region is defined by a set of blocks (\p Blocks), and a set /// of allocas which will be moved from the caller function into the extracted /// function (\p SunkAllocas). -static SetVector -eraseLifetimeMarkersOnInputs(const SetVector &Blocks, - const SetVector &SunkAllocas) { - SetVector InputObjectsWithLifetime; +static void eraseLifetimeMarkersOnInputs(const SetVector &Blocks, + const SetVector &SunkAllocas, + SetVector &LifetimesStart, + SetVector &LifetimesEnd) { for (BasicBlock *BB : Blocks) { for (auto It = BB->begin(), End = BB->end(); It != End;) { auto *II = dyn_cast(&*It); @@ -904,44 +905,64 @@ if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) continue; - InputObjectsWithLifetime.insert(Mem); + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + LifetimesStart.insert(Mem); + else + LifetimesEnd.insert(Mem); II->eraseFromParent(); } } - return InputObjectsWithLifetime; } /// Insert lifetime start/end markers surrounding the call to the new function /// for objects defined in the caller. -static void insertLifetimeMarkersSurroundingCall(Module *M, - ArrayRef Objects, - CallInst *TheCall) { - if (Objects.empty()) - return; - +static void insertLifetimeMarkersSurroundingCall( + Module *M, ArrayRef LifetimesStart, ArrayRef LifetimesEnd, + CallInst *TheCall) { LLVMContext &Ctx = M->getContext(); auto Int8PtrTy = Type::getInt8PtrTy(Ctx); auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); - auto StartFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_start, Int8PtrTy); - auto EndFn = llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::lifetime_end, - Int8PtrTy); Instruction *Term = TheCall->getParent()->getTerminator(); - for (Value *Mem : Objects) { - assert((!isa(Mem) || - cast(Mem)->getFunction() == TheCall->getFunction()) && - "Input memory not defined in original function"); - Value *MemAsI8Ptr = nullptr; - if (Mem->getType() == Int8PtrTy) - MemAsI8Ptr = Mem; - else - MemAsI8Ptr = - CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); - auto StartMarker = CallInst::Create(StartFn, {NegativeOne, MemAsI8Ptr}); - StartMarker->insertBefore(TheCall); - auto EndMarker = CallInst::Create(EndFn, {NegativeOne, MemAsI8Ptr}); - EndMarker->insertBefore(Term); + // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts + // needed to satisfy this requirement so they may be reused. + DenseMap Bitcasts; + + // Emit lifetime markers for the pointers given in \p Objects. Insert the + // markers before the call if \p InsertBefore, and after the call otherwise. + auto insertMarkers = [&](Function *MarkerFunc, ArrayRef Objects, + bool InsertBefore) { + for (Value *Mem : Objects) { + assert((!isa(Mem) || cast(Mem)->getFunction() == + TheCall->getFunction()) && + "Input memory not defined in original function"); + Value *&MemAsI8Ptr = Bitcasts[Mem]; + if (!MemAsI8Ptr) { + if (Mem->getType() == Int8PtrTy) + MemAsI8Ptr = Mem; + else + MemAsI8Ptr = + CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); + } + + auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr}); + if (InsertBefore) + Marker->insertBefore(TheCall); + else + Marker->insertBefore(Term); + } + }; + + if (!LifetimesStart.empty()) { + auto StartFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_start, Int8PtrTy); + insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true); + } + + if (!LifetimesEnd.empty()) { + auto EndFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_end, Int8PtrTy); + insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false); } } @@ -1200,7 +1221,7 @@ // Insert lifetime markers around the reloads of any output values. The // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, call); + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); return call; } @@ -1382,8 +1403,9 @@ // referenced by lifetime start/end markers within it. The effects of these // markers must be replicated in the calling function to prevent the stack // coloring pass from merging slots which store input objects. - ValueSet InputObjectsWithLifetime = - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands); + ValueSet LifetimesStart, LifetimesEnd; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart, + LifetimesEnd); // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = @@ -1407,8 +1429,8 @@ // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), - InputObjectsWithLifetime.getArrayRef(), - TheCall); + LifetimesStart.getArrayRef(), + LifetimesEnd.getArrayRef(), TheCall); // Propagate personality info to the new function if there is one. if (oldFunction->hasPersonalityFn()) Index: llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s 2>&1 | FileCheck %s + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +declare void @cold_use(i8*) cold + +; In this CFG, splitting will extract the blocks extract{1,2}. I.e., it will +; extract a lifetime.start marker, but not the corresponding lifetime.end +; marker. Make sure that a lifetime.start marker is emitted before the call to +; the split function, and *only* that marker. +; +; entry +; / \ +; extract1 no-extract1 +; (lt.start) | +; / | +; extract2 | +; \_____ | +; \ / +; exit +; (lt.end) +; +; After splitting, we should see: +; +; entry +; / \ +; codeRepl no-extract1 +; (lt.start) | +; \ / +; exit +; (lt.end) +define void @only_lifetime_start_is_cold() { +; CHECK-LABEL: @only_lifetime_start_is_cold( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256 +; CHECK-NEXT: [[LOCAL1_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* +; CHECK-NEXT: br i1 undef, label [[CODEREPL:%.*]], label [[NO_EXTRACT1:%.*]] +; CHECK: codeRepl: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @only_lifetime_start_is_cold.cold.1(i8* [[LOCAL1_CAST]]) #3 +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[NO_EXTRACT1]], label [[EXIT:%.*]] +; CHECK: no-extract1: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[LOCAL1_CAST]]) +; CHECK-NEXT: ret void +; +entry: + %local1 = alloca i256 + %local1_cast = bitcast i256* %local1 to i8* + br i1 undef, label %extract1, label %no-extract1 + +extract1: + ; lt.start + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast) + call void @cold_use(i8* %local1_cast) + br i1 undef, label %extract2, label %no-extract1 + +extract2: + br label %exit + +no-extract1: + br label %exit + +exit: + ; lt.end + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + ret void +} + +; In this CFG, splitting will extract the block extract1. I.e., it will extract +; a lifetime.end marker, but not the corresponding lifetime.start marker. Make +; sure that a lifetime.end marker is emitted after the call to the split +; function, and *only* that marker. +; +; entry +; (lt.start) +; / \ +; no-extract1 extract1 +; (lt.end) (lt.end) +; \ / +; exit +; +; After splitting, we should see: +; +; entry +; (lt.start) +; / \ +; no-extract1 codeRepl +; (lt.end) (lt.end) +; \ / +; exit +define void @only_lifetime_end_is_cold() { +; CHECK-LABEL: @only_lifetime_end_is_cold( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256 +; CHECK-NEXT: [[LOCAL1_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[LOCAL1_CAST]]) +; CHECK-NEXT: br i1 undef, label [[NO_EXTRACT1:%.*]], label [[CODEREPL:%.*]] +; CHECK: no-extract1: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[LOCAL1_CAST]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: codeRepl: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* +; CHECK-NEXT: call void @only_lifetime_end_is_cold.cold.1(i8* [[LOCAL1_CAST]]) #3 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + ; lt.start + %local1 = alloca i256 + %local1_cast = bitcast i256* %local1 to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast) + br i1 undef, label %no-extract1, label %extract1 + +no-extract1: + ; lt.end + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + br label %exit + +extract1: + ; lt.end + call void @cold_use(i8* %local1_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + br label %exit + +exit: + ret void +}