Index: llvm/include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -151,6 +151,20 @@ BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock); private: + struct LifetimeMarkerInfo { + bool SinkLifeStart = false; + bool HoistLifeEnd = false; + Instruction *LifeStart = nullptr; + Instruction *LifeEnd = nullptr; + }; + + // Find the pair of life time markers for address 'Addr' that are either + // defined inside the outline region or can legally be shrinkwrapped into + // the outline region. If there are not other untracked uses of the address, + // return the pair of markers if found; otherwise return a pair of nullptr. + LifetimeMarkerInfo getLifeTimeMarkers(Instruction *Addr, + BasicBlock *ExitBlock) const; + void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(const SmallPtrSetImpl &Exits); void splitReturnBlocks(); Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -410,11 +410,70 @@ return CommonExitBlock; } +CodeExtractor::LifetimeMarkerInfo +CodeExtractor::getLifeTimeMarkers(Instruction *Addr, + BasicBlock *ExitBlock) const { + LifetimeMarkerInfo Info; + + for (User *U : Addr->users()) { + IntrinsicInst *IntrInst = dyn_cast(U); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { + // Do not handle the case where Addr has multiple start markers. + if (Info.LifeStart) + return {}; + Info.LifeStart = IntrInst; + } + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { + if (Info.LifeEnd) + return {}; + Info.LifeEnd = IntrInst; + } + continue; + } + // Find untracked uses of the address, bail. + if (!definedInRegion(Blocks, U)) + return {}; + } + + if (!Info.LifeStart || !Info.LifeEnd) + return {}; + + Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart); + Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd); + // Do legality check. + if ((Info.SinkLifeStart || Info.HoistLifeEnd) && + !isLegalToShrinkwrapLifetimeMarkers(Addr)) + return {}; + + // Check to see if we have a place to do hoisting, if not, bail. + if (Info.HoistLifeEnd && !ExitBlock) + return {}; + + return Info; +} + void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); + auto moveOrIgnoreLifetimeMarkers = + [&](const LifetimeMarkerInfo &LMI) -> bool { + if (!LMI.LifeStart) + return false; + if (LMI.SinkLifeStart) { + LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart + << "\n"); + SinkCands.insert(LMI.LifeStart); + } + if (LMI.HoistLifeEnd) { + LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n"); + HoistCands.insert(LMI.LifeEnd); + } + return true; + }; + for (BasicBlock &BB : *Func) { if (Blocks.count(&BB)) continue; @@ -423,95 +482,52 @@ if (!AI) continue; - // Find the pair of life time markers for address 'Addr' that are either - // defined inside the outline region or can legally be shrinkwrapped into - // the outline region. If there are not other untracked uses of the - // address, return the pair of markers if found; otherwise return a pair - // of nullptr. - auto GetLifeTimeMarkers = - [&](Instruction *Addr, bool &SinkLifeStart, - bool &HoistLifeEnd) -> std::pair { - Instruction *LifeStart = nullptr, *LifeEnd = nullptr; - - for (User *U : Addr->users()) { - IntrinsicInst *IntrInst = dyn_cast(U); - if (IntrInst) { - if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { - // Do not handle the case where AI has multiple start markers. - if (LifeStart) - return std::make_pair(nullptr, nullptr); - LifeStart = IntrInst; - } - if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { - if (LifeEnd) - return std::make_pair(nullptr, nullptr); - LifeEnd = IntrInst; - } - continue; - } - // Find untracked uses of the address, bail. - if (!definedInRegion(Blocks, U)) - return std::make_pair(nullptr, nullptr); - } - - if (!LifeStart || !LifeEnd) - return std::make_pair(nullptr, nullptr); - - SinkLifeStart = !definedInRegion(Blocks, LifeStart); - HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); - // Do legality Check. - if ((SinkLifeStart || HoistLifeEnd) && - !isLegalToShrinkwrapLifetimeMarkers(Addr)) - return std::make_pair(nullptr, nullptr); - - // Check to see if we have a place to do hoisting, if not, bail. - if (HoistLifeEnd && !ExitBlock) - return std::make_pair(nullptr, nullptr); - - return std::make_pair(LifeStart, LifeEnd); - }; - - bool SinkLifeStart = false, HoistLifeEnd = false; - auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); - - if (Markers.first) { - if (SinkLifeStart) - SinkCands.insert(Markers.first); + LifetimeMarkerInfo MarkerInfo = getLifeTimeMarkers(AI, ExitBlock); + bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); + if (Moved) { + LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); SinkCands.insert(AI); - if (HoistLifeEnd) - HoistCands.insert(Markers.second); continue; } - // Follow the bitcast. - Instruction *MarkerAddr = nullptr; + // Follow any bitcasts. + SmallVector Bitcasts; + SmallVector BitcastLifetimeInfo; for (User *U : AI->users()) { if (U->stripInBoundsConstantOffsets() == AI) { - SinkLifeStart = false; - HoistLifeEnd = false; Instruction *Bitcast = cast(U); - Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); - if (Markers.first) { - MarkerAddr = Bitcast; + LifetimeMarkerInfo LMI = getLifeTimeMarkers(Bitcast, ExitBlock); + if (LMI.LifeStart) { + Bitcasts.push_back(Bitcast); + BitcastLifetimeInfo.push_back(LMI); continue; } } // Found unknown use of AI. if (!definedInRegion(Blocks, U)) { - MarkerAddr = nullptr; + Bitcasts.clear(); break; } } - if (MarkerAddr) { - if (SinkLifeStart) - SinkCands.insert(Markers.first); - if (!definedInRegion(Blocks, MarkerAddr)) - SinkCands.insert(MarkerAddr); - SinkCands.insert(AI); - if (HoistLifeEnd) - HoistCands.insert(Markers.second); + // Either no bitcasts reference the alloca or there are unknown uses. + if (Bitcasts.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + SinkCands.insert(AI); + for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { + Instruction *BitcastAddr = Bitcasts[I]; + const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; + assert(LMI.LifeStart && + "Unsafe to sink bitcast without lifetime markers"); + moveOrIgnoreLifetimeMarkers(LMI); + if (!definedInRegion(Blocks, BitcastAddr)) { + LLVM_DEBUG(dbgs() + << "Sinking bitcast-of-alloca: " << *BitcastAddr << "\n"); + SinkCands.insert(BitcastAddr); + } } } } @@ -1337,6 +1353,7 @@ return nullptr; } } + ValueSet inputs, outputs, SinkingCands, HoistingCands; BasicBlock *CommonExit = nullptr; @@ -1412,10 +1429,24 @@ // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); - // Now sink all instructions which only have non-phi uses inside the region - for (auto *II : SinkingCands) - cast(II)->moveBefore(*newFuncRoot, - newFuncRoot->getFirstInsertionPt()); + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + AllocaInst *FirstSunkAlloca = nullptr; + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast(II)) { + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + if (!FirstSunkAlloca) + FirstSunkAlloca = AI; + } + } + assert((SinkingCands.empty() || FirstSunkAlloca) && + "Did not expect a sink candidate without any allocas"); + for (auto *II : SinkingCands) { + if (!isa(II)) { + cast(II)->moveAfter(FirstSunkAlloca); + } + } if (!HoistingCands.empty()) { auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); Index: llvm/test/Transforms/CodeExtractor/live_shrink_multiple.ll =================================================================== --- llvm/test/Transforms/CodeExtractor/live_shrink_multiple.ll +++ llvm/test/Transforms/CodeExtractor/live_shrink_multiple.ll @@ -45,9 +45,9 @@ ; CHECK-LABEL: define internal void @_Z3foov.1. ; CHECK: newFuncRoot: ; CHECK-NEXT: alloca +; CHECK-NEXT: alloca ; CHECK-NEXT: bitcast ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8 -; CHECK-NEXT: alloca ; CHECK-NEXT: bitcast ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8 ; CHECK: call void @llvm.lifetime.end.p0i8 Index: llvm/test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll @@ -0,0 +1,74 @@ +; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +@c = common global i32 0, align 4 +@h = common global i32 0, align 4 + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 + +declare i32* @m() + +; CHECK-LABEL: define void @main() +; CHECK-NEXT: %.sroa.4.i = alloca [20 x i8], align 2 +; CHECK-NEXT: %.sroa.5.i = alloca [6 x i8], align 8 +; CHECK-NEXT: %1 = bitcast [6 x i8]* %.sroa.5.i to i8* + +define void @main() #0 { + %.sroa.4.i = alloca [20 x i8], align 2 + %.sroa.5.i = alloca [6 x i8], align 8 + %1 = bitcast [6 x i8]* %.sroa.5.i to i8* + %2 = load i32, i32* @h, align 4, !tbaa !4 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %12, label %4 + +4: ; preds = %0 + %5 = call i32* @m() #3 + %.sroa.4.0..sroa_idx21.i = getelementptr inbounds [20 x i8], [20 x i8]* %.sroa.4.i, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 20, i8* %.sroa.4.0..sroa_idx21.i) #3 + %.sroa.5.0..sroa_idx16.i = getelementptr inbounds [6 x i8], [6 x i8]* %.sroa.5.i, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 6, i8* %.sroa.5.0..sroa_idx16.i) #3 + call void @llvm.memset.p0i8.i64(i8* align 2 %.sroa.4.0..sroa_idx21.i, i8 0, i64 20, i1 false) #3 + call void @llvm.memset.p0i8.i64(i8* align 8 %.sroa.5.0..sroa_idx16.i, i8 0, i64 6, i1 false) #3 + %6 = load i32, i32* @c, align 4, !tbaa !4 + %7 = trunc i32 %6 to i16 + call void @llvm.lifetime.end.p0i8(i64 20, i8* %.sroa.4.0..sroa_idx21.i) #3 + call void @llvm.lifetime.end.p0i8(i64 6, i8* %.sroa.5.0..sroa_idx16.i) #3 + call void @llvm.lifetime.start.p0i8(i64 6, i8* %1) #3 + call void @llvm.memset.p0i8.i64(i8* align 1 %1, i8 3, i64 6, i1 false) + br label %8 + +8: ; preds = %8, %4 + %.0.i = phi i32 [ 0, %4 ], [ %10, %8 ] + %9 = sext i32 %.0.i to i64 + %10 = add nsw i32 %.0.i, 1 + %11 = icmp slt i32 %10, 6 + br i1 %11, label %8, label %l.exit + +l.exit: ; preds = %8 + call void @llvm.lifetime.end.p0i8(i64 6, i8* %1) #3 + br label %12 + +12: ; preds = %l.exit, %0 + %13 = phi i1 [ true, %0 ], [ true, %l.exit ] + ret void +} + +attributes #0 = { cold } + +!llvm.module.flags = !{!0, !1, !2} +!llvm.ident = !{!3} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 14]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"PIC Level", i32 2} +!3 = !{!"Apple clang version 11.0.0 (clang-1100.0.20.17)"} +!4 = !{!5, !5, i64 0} +!5 = !{!"int", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"}