Index: llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp @@ -885,16 +885,17 @@ return newFunction; } -/// Scan the extraction region for lifetime markers which reference inputs. -/// Erase these markers. Return the inputs which were referenced. +/// Erase lifetime.start markers which reference inputs to the extraction +/// region, and insert the referenced memory into \p LifetimesStart. Do the same +/// with lifetime.end markers (but insert them into \p LifetimesEnd). /// /// The extraction region is defined by a set of blocks (\p Blocks), and a set /// of allocas which will be moved from the caller function into the extracted /// function (\p SunkAllocas). -static SetVector -eraseLifetimeMarkersOnInputs(const SetVector &Blocks, - const SetVector &SunkAllocas) { - SetVector InputObjectsWithLifetime; +static void eraseLifetimeMarkersOnInputs(const SetVector &Blocks, + const SetVector &SunkAllocas, + SetVector &LifetimesStart, + SetVector &LifetimesEnd) { for (BasicBlock *BB : Blocks) { for (auto It = BB->begin(), End = BB->end(); It != End;) { auto *II = dyn_cast(&*It); @@ -909,44 +910,64 @@ if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem)) continue; - InputObjectsWithLifetime.insert(Mem); + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + LifetimesStart.insert(Mem); + else + LifetimesEnd.insert(Mem); II->eraseFromParent(); } } - return InputObjectsWithLifetime; } /// Insert lifetime start/end markers surrounding the call to the new function /// for objects defined in the caller. -static void insertLifetimeMarkersSurroundingCall(Module *M, - ArrayRef Objects, - CallInst *TheCall) { - if (Objects.empty()) - return; - +static void insertLifetimeMarkersSurroundingCall( + Module *M, ArrayRef LifetimesStart, ArrayRef LifetimesEnd, + CallInst *TheCall) { LLVMContext &Ctx = M->getContext(); auto Int8PtrTy = Type::getInt8PtrTy(Ctx); auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1); - auto StartFn = llvm::Intrinsic::getDeclaration( - M, llvm::Intrinsic::lifetime_start, Int8PtrTy); - auto EndFn = llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::lifetime_end, - Int8PtrTy); Instruction *Term = TheCall->getParent()->getTerminator(); - for (Value *Mem : Objects) { - assert((!isa(Mem) || - cast(Mem)->getFunction() == TheCall->getFunction()) && - "Input memory not defined in original function"); - Value *MemAsI8Ptr = nullptr; - if (Mem->getType() == Int8PtrTy) - MemAsI8Ptr = Mem; - else - MemAsI8Ptr = - CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); - auto StartMarker = CallInst::Create(StartFn, {NegativeOne, MemAsI8Ptr}); - StartMarker->insertBefore(TheCall); - auto EndMarker = CallInst::Create(EndFn, {NegativeOne, MemAsI8Ptr}); - EndMarker->insertBefore(Term); + // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts + // needed to satisfy this requirement so they may be reused. + DenseMap Bitcasts; + + // Emit lifetime markers for the pointers given in \p Objects. Insert the + // markers before the call if \p InsertBefore, and after the call otherwise. + auto insertMarkers = [&](Function *MarkerFunc, ArrayRef Objects, + bool InsertBefore) { + for (Value *Mem : Objects) { + assert((!isa(Mem) || cast(Mem)->getFunction() == + TheCall->getFunction()) && + "Input memory not defined in original function"); + Value *&MemAsI8Ptr = Bitcasts[Mem]; + if (!MemAsI8Ptr) { + if (Mem->getType() == Int8PtrTy) + MemAsI8Ptr = Mem; + else + MemAsI8Ptr = + CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall); + } + + auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr}); + if (InsertBefore) + Marker->insertBefore(TheCall); + else + Marker->insertBefore(Term); + } + }; + + if (!LifetimesStart.empty()) { + auto StartFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_start, Int8PtrTy); + insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true); + } + + if (!LifetimesEnd.empty()) { + auto EndFn = llvm::Intrinsic::getDeclaration( + M, llvm::Intrinsic::lifetime_end, Int8PtrTy); + insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false); } } @@ -1214,7 +1235,7 @@ // Insert lifetime markers around the reloads of any output values. The // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, call); + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); return call; } @@ -1403,8 +1424,9 @@ // referenced by lifetime start/end markers within it. The effects of these // markers must be replicated in the calling function to prevent the stack // coloring pass from merging slots which store input objects. - ValueSet InputObjectsWithLifetime = - eraseLifetimeMarkersOnInputs(Blocks, SinkingCands); + ValueSet LifetimesStart, LifetimesEnd; + eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart, + LifetimesEnd); // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = @@ -1428,8 +1450,8 @@ // Replicate the effects of any lifetime start/end markers which referenced // input objects in the extraction region by placing markers around the call. insertLifetimeMarkersSurroundingCall(oldFunction->getParent(), - InputObjectsWithLifetime.getArrayRef(), - TheCall); + LifetimesStart.getArrayRef(), + LifetimesEnd.getArrayRef(), TheCall); // Propagate personality info to the new function if there is one. if (oldFunction->hasPersonalityFn()) Index: llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-1.ll =================================================================== --- llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-1.ll +++ llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-1.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s 2>&1 | FileCheck %s + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +declare void @use(i8*) + +declare void @cold_use2(i8*, i8*) cold + +; CHECK-LABEL: define {{.*}}@foo( +define void @foo() { +entry: + %local1 = alloca i256 + %local2 = alloca i256 + %local1_cast = bitcast i256* %local1 to i8* + %local2_cast = bitcast i256* %local2 to i8* + br i1 undef, label %normalPath, label %outlinedPath + +normalPath: + ; These two uses of stack slots are non-overlapping. Based on this alone, + ; the stack slots could be merged. + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast) + call void @use(i8* %local1_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @use(i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + ret void + +; CHECK-LABEL: codeRepl: +; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]]) +; CHECK-NEXT: [[local2_cast:%.*]] = bitcast i256* %local2 to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK-NEXT: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]]) +; CHECK-NEXT: br i1 + +outlinedPath: + ; These two uses of stack slots are overlapping. This should prevent + ; merging of stack slots. CodeExtractor must replicate the effects of + ; these markers in the caller to inhibit stack coloring. + %gep1 = getelementptr inbounds i8, i8* %local1_cast, i64 1 + call void @llvm.lifetime.start.p0i8(i64 1, i8* %gep1) + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @cold_use2(i8* %local1_cast, i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %gep1) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + br i1 undef, label %outlinedPath2, label %outlinedPathExit + +outlinedPath2: + ; These extra lifetime markers are used to test that we emit only one + ; pair of guard markers in the caller per memory object. + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) + call void @use(i8* %local2_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) + ret void + +outlinedPathExit: + ret void +} + +; CHECK-LABEL: define {{.*}}@foo.cold.1( +; CHECK-NOT: @llvm.lifetime Index: llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll =================================================================== --- llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll +++ llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs-2.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s 2>&1 | FileCheck %s + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +declare void @cold_use(i8*) cold + +; In this CFG, splitting will extract the blocks extract{1,2}. I.e., it will +; extract a lifetime.start marker, but not the corresponding lifetime.end +; marker. Make sure that a lifetime.start marker is emitted before the call to +; the split function, and *only* that marker. +; +; entry +; / \ +; extract1 no-extract1 +; (lt.start) | +; / | +; extract2 | +; \_____ | +; \ / +; exit +; (lt.end) +; +; After splitting, we should see: +; +; entry +; / \ +; codeRepl no-extract1 +; (lt.start) | +; \ / +; exit +; (lt.end) +define void @only_lifetime_start_is_cold() { +; CHECK-LABEL: @only_lifetime_start_is_cold( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256 +; CHECK-NEXT: [[LOCAL1_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* +; CHECK-NEXT: br i1 undef, label [[CODEREPL:%.*]], label [[NO_EXTRACT1:%.*]] +; CHECK: codeRepl: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @only_lifetime_start_is_cold.cold.1(i8* [[LOCAL1_CAST]]) #3 +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[NO_EXTRACT1]], label [[EXIT:%.*]] +; CHECK: no-extract1: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[LOCAL1_CAST]]) +; CHECK-NEXT: ret void +; +entry: + %local1 = alloca i256 + %local1_cast = bitcast i256* %local1 to i8* + br i1 undef, label %extract1, label %no-extract1 + +extract1: + ; lt.start + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast) + call void @cold_use(i8* %local1_cast) + br i1 undef, label %extract2, label %no-extract1 + +extract2: + br label %exit + +no-extract1: + br label %exit + +exit: + ; lt.end + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + ret void +} + +; In this CFG, splitting will extract the block extract1. I.e., it will extract +; a lifetime.end marker, but not the corresponding lifetime.start marker. Make +; sure that a lifetime.end marker is emitted after the call to the split +; function, and *only* that marker. +; +; entry +; (lt.start) +; / \ +; no-extract1 extract1 +; (lt.end) (lt.end) +; \ / +; exit +; +; After splitting, we should see: +; +; entry +; (lt.start) +; / \ +; no-extract1 codeRepl +; (lt.end) (lt.end) +; \ / +; exit +define void @only_lifetime_end_is_cold() { +; CHECK-LABEL: @only_lifetime_end_is_cold( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL1:%.*]] = alloca i256 +; CHECK-NEXT: [[LOCAL1_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[LOCAL1_CAST]]) +; CHECK-NEXT: br i1 undef, label [[NO_EXTRACT1:%.*]], label [[CODEREPL:%.*]] +; CHECK: no-extract1: +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[LOCAL1_CAST]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: codeRepl: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i256* [[LOCAL1]] to i8* +; CHECK-NEXT: call void @only_lifetime_end_is_cold.cold.1(i8* [[LOCAL1_CAST]]) #3 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + ; lt.start + %local1 = alloca i256 + %local1_cast = bitcast i256* %local1 to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast) + br i1 undef, label %no-extract1, label %extract1 + +no-extract1: + ; lt.end + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + br label %exit + +extract1: + ; lt.end + call void @cold_use(i8* %local1_cast) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) + br label %exit + +exit: + ret void +} Index: llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll =================================================================== --- llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll +++ llvm/trunk/test/Transforms/HotColdSplit/lifetime-markers-on-inputs.ll @@ -1,66 +0,0 @@ -; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s 2>&1 | FileCheck %s - -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) - -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) - -declare void @use(i8*) - -declare void @cold_use2(i8*, i8*) cold - -; CHECK-LABEL: define {{.*}}@foo( -define void @foo() { -entry: - %local1 = alloca i256 - %local2 = alloca i256 - %local1_cast = bitcast i256* %local1 to i8* - %local2_cast = bitcast i256* %local2 to i8* - br i1 undef, label %normalPath, label %outlinedPath - -normalPath: - ; These two uses of stack slots are non-overlapping. Based on this alone, - ; the stack slots could be merged. - call void @llvm.lifetime.start.p0i8(i64 1, i8* %local1_cast) - call void @use(i8* %local1_cast) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %local1_cast) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) - call void @use(i8* %local2_cast) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) - ret void - -; CHECK-LABEL: codeRepl: -; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]]) -; CHECK-NEXT: [[local2_cast:%.*]] = bitcast i256* %local2 to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]]) -; CHECK-NEXT: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]]) -; CHECK-NEXT: br i1 - -outlinedPath: - ; These two uses of stack slots are overlapping. This should prevent - ; merging of stack slots. CodeExtractor must replicate the effects of - ; these markers in the caller to inhibit stack coloring. - %gep1 = getelementptr inbounds i8, i8* %local1_cast, i64 1 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %gep1) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) - call void @cold_use2(i8* %local1_cast, i8* %local2_cast) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %gep1) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) - br i1 undef, label %outlinedPath2, label %outlinedPathExit - -outlinedPath2: - ; These extra lifetime markers are used to test that we emit only one - ; pair of guard markers in the caller per memory object. - call void @llvm.lifetime.start.p0i8(i64 1, i8* %local2_cast) - call void @use(i8* %local2_cast) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %local2_cast) - ret void - -outlinedPathExit: - ret void -} - -; CHECK-LABEL: define {{.*}}@foo.cold.1( -; CHECK-NOT: @llvm.lifetime