diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -69,6 +69,7 @@ #include "llvm/Transforms/Utils/ValueMapper.h" #include #include +#include #include #define DEBUG_TYPE "hotcoldsplit" @@ -97,6 +98,11 @@ cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting.")); +static cl::opt + OutlineEH("hotcoldsplit-outline-eh", cl::init(false), cl::Hidden, + cl::desc("Perform outlining for Itanium ABI-based" + " exception handling blocks.")); + namespace { // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify // this function unless you modify the MBB version as well. @@ -116,7 +122,7 @@ bool unlikelyExecuted(BasicBlock &BB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { // Exception handling blocks are unlikely executed. - if (BB.isEHPad() || isa(BB.getTerminator())) + if (!OutlineEH && (BB.isEHPad() || isa(BB.getTerminator()))) return true; // The block is cold if it calls/invokes a cold function. However, do not @@ -340,6 +346,10 @@ if (OutliningBenefit <= OutliningPenalty) return nullptr; + LLVM_DEBUG(dbgs() << "Attempting to outline region into function\n"); + LLVM_DEBUG(dbgs() << "Region size = " << Region.size() << "\n"); + LLVM_DEBUG(dbgs() << "Region entry block = " << Region[0]->getName() << "\n"); + Function *OrigF = Region[0]->getParent(); if (Function *OutF = CE.extractCodeRegion(CEAC)) { User *U = *OutF->user_begin(); @@ -370,6 +380,7 @@ return OutF; } + LLVM_DEBUG(dbgs() << "CodeExtractor failed to extract the region\n"); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", &*Region[0]->begin()) @@ -592,6 +603,68 @@ OptimizationRemarkEmitter &ORE = (*GetORE)(F); AssumptionCache *AC = LookupAC(F); + // For each catch.dispatch block, elevate the + // calls to eh.typeid.for instructions into + // the landingpad block for outlining purposes. + + std::set LPadSuccessors; + + // Split EH pad blocks into a landing pad block and the + // rest. We can start outlining at the first non-landingpad + // instruction. + + // The EH outlining strategy below only works with Itanium-style EH. + // WinEH outlining is not supported. We check if the personality + // function is WinEH's (CxxFrameHandler3), or we try to do EH outlining. + if (OutlineEH && F.hasPersonalityFn() && + !F.getPersonalityFn()->getName().endswith("CxxFrameHandler3")) { + for (BasicBlock *BB : RPOT) + if (BB->isEHPad()) { + LLVM_DEBUG({ + dbgs() << "Found an EH Basic Block: "; + BB->dump(); + dbgs() << "------------------------\n"; + }); + + if (!DT) + DT = std::make_unique(F); + std::vector EHIntrinsicCalls; + SmallVector Descendants; + DT->getDescendants(BB, Descendants); + for (BasicBlock *SuccBB : Descendants) { + for (Instruction &I : *SuccBB) { + if (isa(&I)) { + const CallInst *CI = dyn_cast(&I); + if (CI->getIntrinsicID() == Intrinsic::eh_typeid_for) + EHIntrinsicCalls.push_back(&I); + } + } + } + Instruction *LPadInst = BB->getLandingPadInst()->getNextNode(); + BasicBlock *NewSuccessorBlock = SplitBlock(BB, LPadInst, DT.get()); + for (size_t I = 0; I < EHIntrinsicCalls.size(); I++) { + EHIntrinsicCalls[I]->removeFromParent(); + // Insert eh.typeid.for call after the landingpad instruction. + // We split \p BB from the next instruction after the landingpad + // instruction, so the landingpad instruction's successor + // must be the terminating unconditional branch. + Instruction *PreBranchInst = BB->getTerminator()->getPrevNode(); + BB->getInstList().insertAfter(PreBranchInst->getIterator(), + EHIntrinsicCalls[I]); + } + LLVM_DEBUG({ + dbgs() + << "EH Outliner: Split BB into lpad and lpad.split, lpad.split: "; + NewSuccessorBlock->dump(); + dbgs() << "---------------------\n"; + dbgs() << "EH Outliner: lpad:"; + BB->dump(); + dbgs() << "---------------------\n"; + }); + LPadSuccessors.insert(NewSuccessorBlock); + } + } + // Find all cold regions. for (BasicBlock *BB : RPOT) { // This block is already part of some outlining region. @@ -600,6 +673,21 @@ bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) || (EnableStaticAnalysis && unlikelyExecuted(*BB, PSI, BFI)); + + if (OutlineEH && EnableStaticAnalysis && BB->getSinglePredecessor() && + BB->getSinglePredecessor()->isEHPad()) { + LLVM_DEBUG(dbgs() << "EH Outliner: block " << BB->getName() + << " has EHPad predecessor and marked as cold\n"); + Cold = true; + } + + // if BB is a split EH-pad block + if (OutlineEH && LPadSuccessors.find(BB) != LPadSuccessors.end()) { + LLVM_DEBUG(dbgs() << "EH Outliner: Found a LPad successor block " + << BB->getName() << "\n"); + Cold = true; + } + if (!Cold) continue; @@ -726,8 +814,8 @@ return HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M); } -PreservedAnalyses -HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) { +PreservedAnalyses HotColdSplittingPass::run(Module &M, + ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); auto LookupAC = [&FAM](Function &F) -> AssumptionCache * { diff --git a/llvm/test/Transforms/HotColdSplit/try.ll b/llvm/test/Transforms/HotColdSplit/try.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/try.ll @@ -0,0 +1,175 @@ +; RUN: opt -S -hotcoldsplit -hotcoldsplit-outline-eh=true < %s 2>&1 | FileCheck %s +; CHECK: define{{.*}}@main.cold.1{{.*}} +%"class.std::__cxx11::basic_string" = type { %"struct.std::__cxx11::basic_string, std::allocator>::_Alloc_hider", i64, %union.anon } +%"struct.std::__cxx11::basic_string, std::allocator>::_Alloc_hider" = type { i8* } +%union.anon = type { i64, [8 x i8] } + +$__clang_call_terminate = comdat any + +$_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE = comdat any + +$_ZTINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE = comdat any + +@_ZTIi = external dso_local constant i8* +@_ZTIc = external dso_local constant i8* +@_ZTIl = external dso_local constant i8* +@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* +@_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE = linkonce_odr dso_local constant [53 x i8] c"NSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE\00", comdat, align 1 +@_ZTINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE = linkonce_odr dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([53 x i8], [53 x i8]* @_ZTSNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE, i32 0, i32 0) }, comdat, align 8 + +define dso_local i32 @main() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + %exn.slot = alloca i8*, align 8 + %ehselector.slot = alloca i32, align 4 + %s = alloca %"class.std::__cxx11::basic_string", align 8 + %d = alloca i64, align 8 + %c = alloca i8, align 1 + %e = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 3, i32* %i, align 4 + %0 = load i32, i32* %i, align 4 + %cmp = icmp eq i32 %0, 3 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = call i8* @__cxa_allocate_exception(i64 4) #5 + %1 = bitcast i8* %exception to i32* + %2 = load i32, i32* %i, align 4 + store i32 %2, i32* %1, align 16 + invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #6 + to label %unreachable unwind label %lpad + +lpad: ; preds = %if.then + %3 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + catch i8* bitcast (i8** @_ZTIc to i8*) + catch i8* bitcast (i8** @_ZTIl to i8*) + catch i8* bitcast ({ i8*, i8* }* @_ZTINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE to i8*) + %4 = extractvalue { i8*, i32 } %3, 0 + store i8* %4, i8** %exn.slot, align 8 + %5 = extractvalue { i8*, i32 } %3, 1 + store i32 %5, i32* %ehselector.slot, align 4 + br label %catch.dispatch + +catch.dispatch: ; preds = %lpad + %sel = load i32, i32* %ehselector.slot, align 4 + %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #5 + %matches = icmp eq i32 %sel, %6 + br i1 %matches, label %catch10, label %catch.fallthrough + +catch10: ; preds = %catch.dispatch + %exn11 = load i8*, i8** %exn.slot, align 8 + %7 = call i8* @__cxa_begin_catch(i8* %exn11) #5 + %8 = bitcast i8* %7 to i32* + %9 = load i32, i32* %8, align 4 + store i32 %9, i32* %e, align 4 + %10 = load i32, i32* %i, align 4 + call void @exit(i32 %10) #7 + unreachable + +catch.fallthrough: ; preds = %catch.dispatch + %11 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIc to i8*)) #5 + %matches1 = icmp eq i32 %sel, %11 + br i1 %matches1, label %catch8, label %catch.fallthrough2 + +catch8: ; preds = %catch.fallthrough + %exn9 = load i8*, i8** %exn.slot, align 8 + %12 = call i8* @__cxa_begin_catch(i8* %exn9) #5 + %13 = load i8, i8* %12, align 1 + store i8 %13, i8* %c, align 1 + call void @exit(i32 2) #7 + unreachable + +catch.fallthrough2: ; preds = %catch.fallthrough + %14 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIl to i8*)) #5 + %matches3 = icmp eq i32 %sel, %14 + br i1 %matches3, label %catch6, label %catch.fallthrough4 + +catch6: ; preds = %catch.fallthrough2 + %exn7 = load i8*, i8** %exn.slot, align 8 + %15 = call i8* @__cxa_begin_catch(i8* %exn7) #5 + %16 = bitcast i8* %15 to i64* + %17 = load i64, i64* %16, align 8 + store i64 %17, i64* %d, align 8 + call void @exit(i32 4) #7 + unreachable + +catch.fallthrough4: ; preds = %catch.fallthrough2 + %18 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE to i8*)) #5 + %matches5 = icmp eq i32 %sel, %18 + br i1 %matches5, label %catch, label %eh.resume + +catch: ; preds = %catch.fallthrough4 + %exn = load i8*, i8** %exn.slot, align 8 + %19 = call i8* @__cxa_get_exception_ptr(i8* %exn) #5 + %20 = bitcast i8* %19 to %"class.std::__cxx11::basic_string"* + invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ERKS4_(%"class.std::__cxx11::basic_string"* %s, %"class.std::__cxx11::basic_string"* nonnull align 8 dereferenceable(32) %20) + to label %invoke.cont unwind label %terminate.lpad + +invoke.cont: ; preds = %catch + %21 = call i8* @__cxa_begin_catch(i8* %exn) #5 + call void @exit(i32 5) #7 + unreachable + +if.end: ; preds = %entry + br label %try.cont + +try.cont: ; preds = %if.end + ret i32 0 + +eh.resume: ; preds = %catch.fallthrough4 + %exn12 = load i8*, i8** %exn.slot, align 8 + %sel13 = load i32, i32* %ehselector.slot, align 4 + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn12, 0 + %lpad.val14 = insertvalue { i8*, i32 } %lpad.val, i32 %sel13, 1 + resume { i8*, i32 } %lpad.val14 + +terminate.lpad: ; preds = %catch + %22 = landingpad { i8*, i32 } + catch i8* null + %23 = extractvalue { i8*, i32 } %22, 0 + call void @__clang_call_terminate(i8* %23) #7 + unreachable + +unreachable: ; preds = %if.then + unreachable +} + +declare dso_local i8* @__cxa_allocate_exception(i64) + +declare dso_local void @__cxa_throw(i8*, i8*, i8*) + +declare dso_local i32 @__gxx_personality_v0(...) + +; Function Attrs: nounwind readnone +declare i32 @llvm.eh.typeid.for(i8*) #1 + +declare dso_local i8* @__cxa_get_exception_ptr(i8*) + +declare dso_local void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ERKS4_(%"class.std::__cxx11::basic_string"*, %"class.std::__cxx11::basic_string"* nonnull align 8 dereferenceable(32)) unnamed_addr #2 + +; Function Attrs: noreturn nounwind +define linkonce_odr hidden void @__clang_call_terminate(i8* %0) #3 comdat { + %2 = call i8* @__cxa_begin_catch(i8* %0) #5 + call void @_ZSt9terminatev() #7 + unreachable +} + +declare dso_local i8* @__cxa_begin_catch(i8*) + +declare dso_local void @_ZSt9terminatev() + +; Function Attrs: noreturn nounwind +declare dso_local void @exit(i32) #4 + +attributes #0 = { norecurse uwtable } +attributes #1 = { nounwind readnone } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { noreturn nounwind } +attributes #4 = { noreturn nounwind } +attributes #5 = { nounwind } +attributes #6 = { noreturn } +attributes #7 = { noreturn nounwind } +