diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -2277,7 +2277,11 @@ } /// Process LSDA information for the function. - void parseLSDA(ArrayRef LSDAData, uint64_t LSDAAddress); + /// Run in two phases: + /// (a) Validate landing pad targets --> CheckOnly + /// (b) Skip validation, process LSDA only --> !CheckOnly + void parseLSDA(ArrayRef LSDAData, uint64_t LSDAAddress, + bool CheckOnly); /// Update exception handling ranges for the function. void updateEHRanges(); diff --git a/bolt/include/bolt/Core/Exceptions.h b/bolt/include/bolt/Core/Exceptions.h --- a/bolt/include/bolt/Core/Exceptions.h +++ b/bolt/include/bolt/Core/Exceptions.h @@ -38,6 +38,7 @@ public: explicit CFIReaderWriter(const DWARFDebugFrame &EHFrame); + void extractLSDAAddress(BinaryFunction &Function) const; bool fillCFIInfoFor(BinaryFunction &Function) const; /// Generate .eh_frame_hdr from old and new .eh_frame sections. diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1884,8 +1884,12 @@ if (!EHInfo || !EHInfo->first) continue; - BinaryBasicBlock *LPBlock = getBasicBlockForLabel(EHInfo->first); - if (!BBLandingPads.count(LPBlock)) { + // For cross-fragment LPs, LPBlock = nullptr if LPFunc not yet buildCFG + // Solution: after buildCFG for all functions, rerun recomputeLandingPads + const MCSymbol *LPLabel = EHInfo->first; + BinaryFunction *LPFunc = BC.getFunctionForSymbol(LPLabel); + BinaryBasicBlock *LPBlock = LPFunc->getBasicBlockForLabel(LPLabel); + if (LPBlock != nullptr && !BBLandingPads.count(LPBlock)) { BBLandingPads.insert(LPBlock); BB->LandingPads.emplace_back(LPBlock); LPBlock->Throwers.emplace_back(BB); diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -99,7 +99,7 @@ // // Note: some functions have LSDA entries with 0 call site entries. void BinaryFunction::parseLSDA(ArrayRef LSDASectionData, - uint64_t LSDASectionAddress) { + uint64_t LSDASectionAddress, bool CheckOnly) { assert(CurrentState == State::Disassembled && "unexpected function state"); if (!getLSDAAddress()) @@ -128,7 +128,7 @@ TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); } - if (opts::PrintExceptions) { + if (opts::PrintExceptions && !CheckOnly) { outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) << " for function " << *this << "]:\n"; outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding) @@ -158,7 +158,7 @@ uint64_t CallSitePtr = CallSiteTableStart; uint64_t ActionTableStart = CallSiteTableEnd; - if (opts::PrintExceptions) { + if (opts::PrintExceptions && !CheckOnly) { outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; outs() << "CallSite table length = " << CallSiteTableLength << '\n'; outs() << '\n'; @@ -177,56 +177,90 @@ uint64_t LPOffset = LPStart + LandingPad; uint64_t LPAddress = Address + LPOffset; + BinaryFunction *Fragment = BC.getBinaryFunctionContainingAddress(LPAddress); + // --------------------- Validate split landing pad --------------------- // Verify if landing pad code is located outside current function - // Support landing pad to builtin_unreachable - if (LPAddress < Address || LPAddress > Address + getSize()) { - BinaryFunction *Fragment = - BC.getBinaryFunctionContainingAddress(LPAddress); + // Otherwise, establish parent-fragment relation + // Skip case where landing pad targets builtin_unreachable + if ((LPAddress < Address || LPAddress > Address + getSize()) && CheckOnly) { + // Assume landing pad not target another fragment's builtin_unreachable + // If this assumption is violated, must run a global check first assert(Fragment != nullptr && "BOLT-ERROR: cannot find landing pad fragment"); + + // Update IsFragment: + // In stripped mode, always trust LSDA, consider the function that + // contains LP as a fragment + // In non-stripped mode, use pattern matching (adjustFunctionBoundaries) + if (BC.IsStripped) + Fragment->IsFragment = true; + + // Update parent-fragment relation, add Fragment as secondary entry of + // the current function, not an independent function BC.addInterproceduralReference(this, Fragment->getAddress()); BC.processInterproceduralReferences(); - auto isFragmentOf = [](BinaryFunction *Fragment, - BinaryFunction *Parent) -> bool { - return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); - }; - assert((isFragmentOf(this, Fragment) || isFragmentOf(Fragment, this)) && - "BOLT-ERROR: cannot have landing pads in different " - "functions"); + + // In stripped mode, parent-fragment is always established --> skip check + // In non-stripped mode, parent-fragment depends on symbol name --> check + if (!BC.IsStripped) { + auto isFragmentOf = [](BinaryFunction *Fragment, + BinaryFunction *Parent) -> bool { + return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); + }; + assert((isFragmentOf(this, Fragment) || isFragmentOf(Fragment, this)) && + "BOLT-ERROR: cannot have landing pads in different functions"); + } + // Mark that this fragment reaches LP in another fragment of same function setHasIndirectTargetToSplitFragment(true); BC.addFragmentsToSkip(this); - return; } - if (opts::PrintExceptions) { - outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) - << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) - << "); landing pad: 0x" << Twine::utohexstr(LPOffset) - << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; - outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) - << '\n'; - } + if (CheckOnly) + continue; + // ---------------------------------------------------------------------- // Create a handler entry if necessary. - MCSymbol *LPSymbol = nullptr; - if (LPOffset) { - if (!getInstructionAtOffset(LPOffset)) { + MCSymbol *LPLabel = nullptr; + + // Special case, consider builtin_unreachable as part of this function + if (LPAddress == Address + getSize()) + Fragment = this; + + // Assumption: landing pad cannot target current fragment entry + // Note: landing pad can target other fragment entry -> split landing pad + if (LPAddress != Address) { + uint64_t FragmentOffset = LPAddress - Fragment->getAddress(); + if (!Fragment->getInstructionAtOffset(FragmentOffset)) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LPOffset) - << " not pointing to an instruction in function " << *this + << " not pointing to an instruction in function " << *Fragment << " - ignoring.\n"; } else { - auto Label = Labels.find(LPOffset); - if (Label != Labels.end()) { - LPSymbol = Label->second; - } else { - LPSymbol = BC.Ctx->createNamedTempSymbol("LP"); - Labels[LPOffset] = LPSymbol; - } + // Treat split landing pad as the fragment's secondary fragment + auto Label = Fragment->Labels.find(FragmentOffset); + LPLabel = (Label != Fragment->Labels.end()) + ? Label->second + : ((Fragment != this) + ? Fragment->addEntryPointAtOffset(FragmentOffset) + : BC.Ctx->createNamedTempSymbol("LP")); + // Support recomputeLandingPad to identify split landing pad + BC.setSymbolToFunctionMap(LPLabel, Fragment); + Labels[LPOffset] = LPLabel; } } + if (opts::PrintExceptions) { + outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) + << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) + << "); landing pad: 0x" << Twine::utohexstr(LPOffset) + << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; + outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) + << '\n'; + if (LPLabel != nullptr) + outs() << " landing pad label: " << LPLabel->getName() << "\n"; + } + // Mark all call instructions in the range. auto II = Instructions.find(Start); auto IE = Instructions.end(); @@ -240,7 +274,7 @@ // Add extra operands to a call instruction making it an invoke from // now on. BC.MIB->addEHInfo(Instruction, - MCPlus::MCLandingPad(LPSymbol, ActionEntry)); + MCPlus::MCLandingPad(LPLabel, ActionEntry)); } ++II; } while (II != IE && II->first < Start + Length); @@ -506,16 +540,26 @@ } } -bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { +void CFIReaderWriter::extractLSDAAddress(BinaryFunction &Function) const { uint64_t Address = Function.getAddress(); auto I = FDEs.find(Address); // Ignore zero-length FDE ranges. if (I == FDEs.end() || !I->second->getAddressRange()) - return true; + return; const FDE &CurFDE = *I->second; Optional LSDA = CurFDE.getLSDAAddress(); Function.setLSDAAddress(LSDA ? *LSDA : 0); +} + +bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { + uint64_t Address = Function.getAddress(); + auto I = FDEs.find(Address); + // Ignore zero-length FDE ranges. + if (I == FDEs.end() || !I->second->getAddressRange()) + return true; + + const FDE &CurFDE = *I->second; uint64_t Offset = Function.getFirstInstructionOffset(); uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -333,8 +333,11 @@ if (!EHInfo || !EHInfo->first) continue; + // In case of split landing pad, LPFunc != BF const MCSymbol *LPLabel = EHInfo->first; - BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel); + BinaryFunction *LPFunc = + BF.getBinaryContext().getFunctionForSymbol(LPLabel); + BinaryBasicBlock *LPBlock = LPFunc->getBasicBlockForLabel(LPLabel); if (BB->isCold() == LPBlock->isCold()) continue; @@ -346,13 +349,14 @@ // Create a trampoline basic block in the same fragment as the thrower. // Note: there's no need to insert the jump instruction, it will be // added by fixBranches(). - BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); + BinaryBasicBlock *TrampolineBB = LPFunc->addBasicBlock(); TrampolineBB->setIsCold(BB->isCold()); TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount()); TrampolineBB->setCFIState(LPBlock->getCFIState()); TrampolineLabel = TrampolineBB->getLabel(); LPTrampolines.insert(std::make_pair(LPLabel, TrampolineLabel)); + BF.getBinaryContext().setSymbolToFunctionMap(TrampolineLabel, LPFunc); } // Substitute the landing pad with the trampoline. diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2901,6 +2901,16 @@ } BC->processInterproceduralReferences(); + // Using LSDA to establish parent-fragment relation + for (auto &BFI : BC->getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + if (!Function.isSimple()) + continue; + CFIRdWrt->extractLSDAAddress(Function); + if (Function.getLSDAAddress() != 0) + Function.parseLSDA(getLSDAData(), getLSDAAddress(), true); + } + BC->populateJumpTables(); for (auto &BFI : BC->getBinaryFunctions()) { @@ -2922,9 +2932,10 @@ if (!shouldDisassemble(Function)) continue; - if (!Function.isSimple()) { - assert((!BC->HasRelocations || Function.getSize() == 0 || - Function.hasIndirectTargetToSplitFragment()) && + // If non-simple due to split jump table or split landing pad, process + // Every other non-simple cases, ignore + if (!Function.isSimple() && !Function.hasIndirectTargetToSplitFragment()) { + assert((!BC->HasRelocations || Function.getSize() == 0) && "unexpected non-simple function in relocation mode"); continue; } @@ -2942,9 +2953,8 @@ } // Parse LSDA. - if (Function.getLSDAAddress() != 0 && - !BC->getFragmentsToSkip().count(&Function)) - Function.parseLSDA(getLSDAData(), getLSDAAddress()); + if (Function.getLSDAAddress() != 0) + Function.parseLSDA(getLSDAData(), getLSDAAddress(), false); } } @@ -2978,6 +2988,14 @@ /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll); BC->postProcessSymbolTable(); + + // buildCFG already executes recomputeLandingPads + // However, with split landing pad, target function might not have CFG yet + // --> Safely ignore split landing pad during buildCFG + // --> Rerun recomputeLandingPads after all functions have CFG + for (BinaryFunction *Function : BC->getAllBinaryFunctions()) + if (Function->getState() == BinaryFunction::State::CFG) + Function->recomputeLandingPads(); } void RewriteInstance::postProcessFunctions() { diff --git a/bolt/test/X86/split-landing-pad.s b/bolt/test/X86/split-landing-pad.s --- a/bolt/test/X86/split-landing-pad.s +++ b/bolt/test/X86/split-landing-pad.s @@ -8,6 +8,8 @@ # This test is written to ensure BOLT safely handle these targets, e.g., by # marking them as non-simple. # +# This test is for both stripped and non-stripped binaries +# # Steps to write this test: # - Create a copy of Inputs/src/unreachable.cpp # - Simplify bar(), focus on throw an exception @@ -25,10 +27,17 @@ # REQUIRES: system-linux # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o # RUN: %clang++ %cxxflags %t.o -o %t.exe -Wl,-q -# RUN: llvm-bolt -v=3 %t.exe -o %t.out 2>&1 | FileCheck %s +# RUN: llvm-bolt -v=3 %t.exe -o %t.out -print-exceptions -print-cfg 2>&1 | FileCheck %s + +# RUN: cp %t.exe %t.tmp.exe +# RUN: llvm-strip -s %t.tmp.exe +# RUN: llvm-bolt -v=3 %t.tmp.exe -o %t.out -print-exceptions -print-cfg 2>&1 | FileCheck %s -# CHECK: BOLT-WARNING: Ignoring foo -# CHECK: BOLT-WARNING: Ignoring foo.cold.1 +# CHECK: BOLT-INFO: marking [[FOO_COLD:.+]] as a fragment of [[FOO:.+]] +# CHECK: landing pad label: [[TMP2:.+]] +# CHECK: landing pad label: [[TMP5:.+]] +# CHECK: BOLT-WARNING: Ignoring [[FOO]] +# CHECK: BOLT-WARNING: Ignoring [[FOO_COLD]] # CHECK: BOLT-WARNING: skipped 2 functions due to cold fragments .text