diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1046,9 +1046,14 @@ /// Return handler and action info for invoke instruction if present. Optional getEHInfo(const MCInst &Inst) const; - // Add handler and action info for call instruction. + /// Add handler and action info for call instruction. void addEHInfo(MCInst &Inst, const MCPlus::MCLandingPad &LP); + /// Update exception-handling info for the invoke instruction \p Inst. + /// Return true on success and false otherwise, e.g. if the instruction is + /// not an invoke. + bool updateEHInfo(MCInst &Inst, const MCPlus::MCLandingPad &LP); + /// Return non-negative GNU_args_size associated with the instruction /// or -1 if there's no associated info. int64_t getGnuArgsSize(const MCInst &Inst) const; diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h --- a/bolt/include/bolt/Passes/SplitFunctions.h +++ b/bolt/include/bolt/Passes/SplitFunctions.h @@ -31,6 +31,13 @@ /// Split function body into fragments. void splitFunction(BinaryFunction &Function); + /// Create trampoline landing pads for exception handling code to guarantee + /// that every landing pad is placed in the same function fragment as the + /// corresponding thrower block. The trampoline landing pad, when created, + /// will redirect the execution to the real landing pad in a different + /// fragment. + void createEHTrampolines(BinaryFunction &Function) const; + std::atomic SplitBytesHot{0ull}; std::atomic SplitBytesCold{0ull}; diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -912,8 +912,8 @@ // defined in the same section and hence cannot place the landing pad into a // cold fragment when the corresponding call site is in the hot fragment. // Because of this issue and the previously described issue of possible - // zero-offset landing pad we disable splitting of exception-handling - // code for shared objects. + // zero-offset landing pad we have to place landing pads in the same section + // as the corresponding invokes for shared objects. std::function emitLandingPad; if (BC.HasFixedLoadAddress) { Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format @@ -925,8 +925,6 @@ Streamer.emitSymbolValue(LPSymbol, 4); }; } else { - assert(!EmitColdPart && - "cannot have exceptions in cold fragment for shared object"); Streamer.emitIntValue(dwarf::DW_EH_PE_omit, 1); // LPStart format emitLandingPad = [&](const MCSymbol *LPSymbol) { if (!LPSymbol) diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -159,6 +159,17 @@ } } +bool MCPlusBuilder::updateEHInfo(MCInst &Inst, const MCLandingPad &LP) { + if (!isInvoke(Inst)) + return false; + + setAnnotationOpValue(Inst, MCAnnotation::kEHLandingPad, + reinterpret_cast(LP.first)); + setAnnotationOpValue(Inst, MCAnnotation::kEHAction, + static_cast(LP.second)); + return true; +} + int64_t MCPlusBuilder::getGnuArgsSize(const MCInst &Inst) const { Optional Value = getAnnotationOpValue(Inst, MCAnnotation::kGnuArgsSize); diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -118,7 +118,7 @@ bool AllCold = true; for (BinaryBasicBlock *BB : BF.layout()) { - uint64_t ExecCount = BB->getExecutionCount(); + const uint64_t ExecCount = BB->getExecutionCount(); if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) return; if (ExecCount != 0) @@ -140,12 +140,12 @@ << " pre-split is <0x" << Twine::utohexstr(OriginalHotSize) << ", 0x" << Twine::utohexstr(ColdSize) << ">\n"); - } - - if (opts::SplitFunctions == SplitFunctions::ST_LARGE && !BC.HasRelocations) { - // Split only if the function wouldn't fit. - if (OriginalHotSize <= BF.getMaxSize()) - return; + if (opts::SplitFunctions == SplitFunctions::ST_LARGE && + !BC.HasRelocations) { + // Split only if the function wouldn't fit. + if (OriginalHotSize <= BF.getMaxSize()) + return; + } } // Never outline the first basic block. @@ -164,9 +164,9 @@ BB->setCanOutline(false); continue; } + if (BF.hasEHRanges() && !opts::SplitEH) { - // We cannot move landing pads (or rather entry points for landing - // pads). + // We cannot move landing pads (or rather entry points for landing pads). if (BB->isLandingPad()) { BB->setCanOutline(false); continue; @@ -176,7 +176,7 @@ // that the block never throws, it is safe to move the block to // decrease the size of the function. for (MCInst &Instr : *BB) { - if (BF.getBinaryContext().MIB->isInvoke(Instr)) { + if (BC.MIB->isInvoke(Instr)) { BB->setCanOutline(false); break; } @@ -214,6 +214,12 @@ BB->setIsCold(true); } + // For shared objects, place invoke instructions and corresponding landing + // pads in the same fragment. To reduce hot code size, create trampoline + // landing pads that will redirect the execution to the real LP. + if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) + createEHTrampolines(BF); + // Check the new size to see if it's worth splitting the function. if (BC.isX86() && BF.isSplit()) { std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); @@ -237,5 +243,65 @@ } } +void SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { + const auto &MIB = BF.getBinaryContext().MIB; + + // Map real landing pads to the corresponding trampolines. + std::unordered_map LPTrampolines; + + // Iterate over the copy of basic blocks since we are adding new blocks to the + // function which will invalidate its iterators. + std::vector Blocks(BF.pbegin(), BF.pend()); + for (BinaryBasicBlock *BB : Blocks) { + for (MCInst &Instr : *BB) { + const Optional EHInfo = MIB->getEHInfo(Instr); + if (!EHInfo || !EHInfo->first) + continue; + + const MCSymbol *LPLabel = EHInfo->first; + BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel); + if (BB->isCold() == LPBlock->isCold()) + continue; + + const MCSymbol *TrampolineLabel = nullptr; + auto Iter = LPTrampolines.find(LPLabel); + if (Iter != LPTrampolines.end()) { + TrampolineLabel = Iter->second; + } else { + // Create a trampoline basic block in the same fragment as the thrower. + // Note: there's no need to insert the jump instruction, it will be + // added by fixBranches(). + BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); + TrampolineBB->setIsCold(BB->isCold()); + TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); + TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount()); + TrampolineBB->setCFIState(LPBlock->getCFIState()); + TrampolineLabel = TrampolineBB->getLabel(); + LPTrampolines.emplace(std::make_pair(LPLabel, TrampolineLabel)); + } + + // Substitute the landing pad with the trampoline. + MIB->updateEHInfo(Instr, + MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); + } + } + + if (LPTrampolines.empty()) + return; + + // All trampoline blocks were added to the end of the function. Place them at + // the end of corresponding fragments. + std::stable_sort(BF.layout_begin(), BF.layout_end(), + [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { + return A->isCold() < B->isCold(); + }); + + // Conservatively introduce branch instructions. + BF.fixBranches(); + + // Update exception-handling CFG for the function. + BF.recomputeLandingPads(); +} + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1686,11 +1686,6 @@ opts::SplitEH = false; } - if (opts::SplitEH && !BC->HasFixedLoadAddress) { - errs() << "BOLT-WARNING: disabling -split-eh for shared object\n"; - opts::SplitEH = false; - } - if (opts::StrictMode && !BC->HasRelocations) { errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation " "mode\n"; diff --git a/bolt/test/runtime/X86/Inputs/exceptions_split.cpp b/bolt/test/runtime/X86/Inputs/exceptions_split.cpp --- a/bolt/test/runtime/X86/Inputs/exceptions_split.cpp +++ b/bolt/test/runtime/X86/Inputs/exceptions_split.cpp @@ -24,7 +24,7 @@ { unsigned r = 0; - uint64_t limit = (argc >= 2 ? 10 : 500000000); + uint64_t limit = (argc >= 2 ? 10 : 5000); for (uint64_t i = 0; i < limit; ++i) { i += foo(); try { diff --git a/bolt/test/runtime/X86/pie-exceptions-split.test b/bolt/test/runtime/X86/pie-exceptions-split.test new file mode 100644 --- /dev/null +++ b/bolt/test/runtime/X86/pie-exceptions-split.test @@ -0,0 +1,29 @@ +## Check that BOLT successfully splits C++ exception-handling code for +## PIEs or shared objects. + +REQUIRES: system-linux + +RUN: %clangxx %cxxflags -pie -fPIC %p/Inputs/exceptions_split.cpp -Wl,-q -o %t +RUN: llvm-bolt %t -o %t.instr --instrument --instrumentation-file=%t.fdata + +## Record profile with invocation that does not throw exceptions. +RUN: %t.instr + +RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \ +RUN: --split-functions=1 --split-eh --print-after-lowering \ +RUN: --print-only=main 2>&1 | FileCheck %s + +## All calls to printf() should be from exception handling code that was +## recorded as cold during the profile collection run. Check that the calls +## are placed after the split point. +CHECK-NOT: callq printf +CHECK: HOT-COLD SPLIT POINT +CHECK: callq printf + +## Verify the output still executes correctly when the exception path is being +## taken. +RUN: %t.bolt arg1 arg2 arg3 2>&1 | FileCheck --check-prefix=CHECK-BOLTED %s + +CHECK-BOLTED: catch 2 +CHECK-BOLTED-NEXT: catch 1 +