diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -2276,8 +2276,18 @@ } } - /// Process LSDA information for the function. - void parseLSDA(ArrayRef LSDAData, uint64_t LSDAAddress); + /// Process LSDA information for the function: + /// SiblingValidation: + /// Must run before branch target analysis + /// Process CallSiteTable to infer siblings for stripped binaries + /// Process CallSiteTable to validate siblings for nonstripped binaries + /// !SiblingValidation: + /// Must run after disassembly and branch target analysis + /// Process CallSiteTable to register labels for landing pad + /// Process ActionTable + /// Process TypesTable + void parseLSDA(ArrayRef LSDAData, uint64_t LSDAAddress, + bool SiblingValidation); /// Update exception handling ranges for the function. void updateEHRanges(); diff --git a/bolt/include/bolt/Core/Exceptions.h b/bolt/include/bolt/Core/Exceptions.h --- a/bolt/include/bolt/Core/Exceptions.h +++ b/bolt/include/bolt/Core/Exceptions.h @@ -38,6 +38,7 @@ public: explicit CFIReaderWriter(const DWARFDebugFrame &EHFrame); + void fillLSDAAddressFor(BinaryFunction &Function) const; bool fillCFIInfoFor(BinaryFunction &Function) const; /// Generate .eh_frame_hdr from old and new .eh_frame sections. diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -99,8 +99,12 @@ // // Note: some functions have LSDA entries with 0 call site entries. void BinaryFunction::parseLSDA(ArrayRef LSDASectionData, - uint64_t LSDASectionAddress) { - assert(CurrentState == State::Disassembled && "unexpected function state"); + uint64_t LSDASectionAddress, + bool SiblingValidation) { + // Sibling validation occurs before branch target analysis + // Otherwise, require branch target analysis done to proceed + assert((SiblingValidation || CurrentState == State::Disassembled) && + "unexpected function state"); if (!getLSDAAddress()) return; @@ -128,7 +132,8 @@ TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding); } - if (opts::PrintExceptions) { + // Avoid printing information twice + if (opts::PrintExceptions && !SiblingValidation) { outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress()) << " for function " << *this << "]:\n"; outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding) @@ -158,7 +163,8 @@ uint64_t CallSitePtr = CallSiteTableStart; uint64_t ActionTableStart = CallSiteTableEnd; - if (opts::PrintExceptions) { + // Avoid printing information twice + if (opts::PrintExceptions && !SiblingValidation) { outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n'; outs() << "CallSite table length = " << CallSiteTableLength << '\n'; outs() << '\n'; @@ -179,100 +185,105 @@ uint64_t LPAddress = Address + LPOffset; BinaryFunction *Fragment = BC.getBinaryFunctionContainingAddress(LPAddress); - // Verify if landing pad code is located outside current function - // Skip case where landing pad targets builtin_unreachable - if (LPAddress < Address || LPAddress > Address + getSize()) { - // Assume landing pad not target another fragment's builtin_unreachable - // If this assumption is violated, must run a global check first - assert(Fragment != nullptr && - "BOLT-ERROR: cannot find landing pad fragment"); - - // Update IsFragment: - // In stripped mode, always trust LSDA, consider the function that - // contains LP as a fragment - // In non-stripped mode, use pattern matching (adjustFunctionBoundaries) - if (BC.IsStripped) - Fragment->IsFragment = true; - - // Update parent-fragment relation, add Fragment as secondary entry of - // the current function, not an independent function - BC.addInterproceduralReference(this, Fragment->getAddress()); - BC.processInterproceduralReferences(); - - // In stripped mode, parent-fragment is always established --> skip check - // In non-stripped mode, parent-fragment depends on symbol name --> check - if (!BC.IsStripped) { - auto isFragmentOf = [](BinaryFunction *Fragment, - BinaryFunction *Parent) -> bool { - return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); - }; - assert((isFragmentOf(this, Fragment) || isFragmentOf(Fragment, this)) && - "BOLT-ERROR: cannot have landing pads in different functions"); + // Sibling inference for stripped binaries + // Sibling validation for nonstripped binaries + if (SiblingValidation) { + // Verify if landing pad code is located outside current function + // Skip case where landing pad targets builtin_unreachable + if (LPAddress < Address || LPAddress > Address + getSize()) { + // Assume landing pad not target another fragment's builtin_unreachable + // If this assumption is violated, must run a global check first + assert(Fragment != nullptr && + "BOLT-ERROR: cannot find landing pad fragment"); + // Update IsFragment: + // For stripped binaries, functions containing LP are marked as sibling + // For nonstripped binaries, validate using symbol name + if (BC.IsStripped) + Fragment->IsFragment = true; + // Register siblings + // FIXME: This method print false warnings if LP is at function entry + BC.addInterproceduralReference(this, Fragment->getAddress()); + BC.processInterproceduralReferences(); + // Validate sibling relationship for nonstripped binaries only + if (!BC.IsStripped) { + auto isFragmentOf = [](BinaryFunction *Fragment, + BinaryFunction *Parent) -> bool { + return (Fragment->isFragment() && + Fragment->isParentFragment(Parent)); + }; + assert( + (isFragmentOf(this, Fragment) || isFragmentOf(Fragment, this)) && + "BOLT-ERROR: cannot have landing pads in different functions"); + } + // Mark that this fragment reaches LP in another fragment of same + // function + setHasIndirectTargetToSplitFragment(true); + BC.addFragmentsToSkip(this); } - // Mark that this fragment reaches LP in another fragment of same function - setHasIndirectTargetToSplitFragment(true); - BC.addFragmentsToSkip(this); } + // Register label for split landing pad + // Update EHInfo for Call Site. This update can only run once! + else { + // Create a handler entry if necessary. + MCSymbol *LPLabel = nullptr; + + // Special case, consider builtin_unreachable as part of this function + if (LPAddress == Address + getSize()) + Fragment = this; + + // Assumption: landing pad cannot target current fragment entry + // Note: landing pad can target other fragment entry -> split landing pad + if (LPAddress != Address) { + uint64_t FragmentOffset = LPAddress - Fragment->getAddress(); + if (!Fragment->getInstructionAtOffset(FragmentOffset)) { + if (opts::Verbosity >= 1) + errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LPOffset) + << " not pointing to an instruction in function " + << *Fragment << " - ignoring.\n"; + } else { + // Treat split landing pad as the fragment's secondary fragment + auto Label = Fragment->Labels.find(FragmentOffset); + LPLabel = (Label != Fragment->Labels.end()) + ? Label->second + : ((Fragment != this) + ? Fragment->addEntryPointAtOffset(FragmentOffset) + : BC.Ctx->createNamedTempSymbol("LP")); + // Support recomputeLandingPad to identify split landing pad + BC.setSymbolToFunctionMap(LPLabel, Fragment); + Labels[LPOffset] = LPLabel; + } + } - // Create a handler entry if necessary. - MCSymbol *LPLabel = nullptr; - - // Special case, consider builtin_unreachable as part of this function - if (LPAddress == Address + getSize()) - Fragment = this; - - // Assumption: landing pad cannot target current fragment entry - // Note: landing pad can target other fragment entry -> split landing pad - if (LPAddress != Address) { - uint64_t FragmentOffset = LPAddress - Fragment->getAddress(); - if (!Fragment->getInstructionAtOffset(FragmentOffset)) { - if (opts::Verbosity >= 1) - errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LPOffset) - << " not pointing to an instruction in function " << *Fragment - << " - ignoring.\n"; - } else { - // Treat split landing pad as the fragment's secondary fragment - auto Label = Fragment->Labels.find(FragmentOffset); - LPLabel = (Label != Fragment->Labels.end()) - ? Label->second - : ((Fragment != this) - ? Fragment->addEntryPointAtOffset(FragmentOffset) - : BC.Ctx->createNamedTempSymbol("LP")); - // Support recomputeLandingPad to identify split landing pad - BC.setSymbolToFunctionMap(LPLabel, Fragment); - Labels[LPOffset] = LPLabel; + if (opts::PrintExceptions) { + outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) + << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) + << "); landing pad: 0x" << Twine::utohexstr(LPOffset) + << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; + outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) + << '\n'; + if (LPLabel != nullptr) + outs() << " landing pad label: " << LPLabel->getName() << "\n"; } - } - if (opts::PrintExceptions) { - outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start) - << ", 0x" << Twine::utohexstr(RangeBase + Start + Length) - << "); landing pad: 0x" << Twine::utohexstr(LPOffset) - << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n"; - outs() << " current offset is " << (CallSitePtr - CallSiteTableStart) - << '\n'; - if (LPLabel != nullptr) - outs() << " landing pad label: " << LPLabel->getName() << "\n"; + // Mark all call instructions in the range. + auto II = Instructions.find(Start); + auto IE = Instructions.end(); + assert(II != IE && "exception range not pointing to an instruction"); + do { + MCInst &Instruction = II->second; + if (BC.MIB->isCall(Instruction) && + !BC.MIB->getConditionalTailCall(Instruction)) { + assert(!BC.MIB->isInvoke(Instruction) && + "overlapping exception ranges detected"); + // Add extra operands to a call instruction making it an invoke from + // now on. + BC.MIB->addEHInfo(Instruction, + MCPlus::MCLandingPad(LPLabel, ActionEntry)); + } + ++II; + } while (II != IE && II->first < Start + Length); } - // Mark all call instructions in the range. - auto II = Instructions.find(Start); - auto IE = Instructions.end(); - assert(II != IE && "exception range not pointing to an instruction"); - do { - MCInst &Instruction = II->second; - if (BC.MIB->isCall(Instruction) && - !BC.MIB->getConditionalTailCall(Instruction)) { - assert(!BC.MIB->isInvoke(Instruction) && - "overlapping exception ranges detected"); - // Add extra operands to a call instruction making it an invoke from - // now on. - BC.MIB->addEHInfo(Instruction, - MCPlus::MCLandingPad(LPLabel, ActionEntry)); - } - ++II; - } while (II != IE && II->first < Start + Length); - if (ActionEntry != 0) { auto printType = [&](int Index, raw_ostream &OS) { assert(Index > 0 && "only positive indices are valid"); @@ -534,16 +545,26 @@ } } -bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { +void CFIReaderWriter::fillLSDAAddressFor(BinaryFunction &Function) const { uint64_t Address = Function.getAddress(); auto I = FDEs.find(Address); // Ignore zero-length FDE ranges. if (I == FDEs.end() || !I->second->getAddressRange()) - return true; + return; const FDE &CurFDE = *I->second; Optional LSDA = CurFDE.getLSDAAddress(); Function.setLSDAAddress(LSDA ? *LSDA : 0); +} + +bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { + uint64_t Address = Function.getAddress(); + auto I = FDEs.find(Address); + // Ignore zero-length FDE ranges. + if (I == FDEs.end() || !I->second->getAddressRange()) + return true; + + const FDE &CurFDE = *I->second; uint64_t Offset = Function.getFirstInstructionOffset(); uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2854,6 +2854,19 @@ void RewriteInstance::disassembleFunctions() { NamedRegionTimer T("disassembleFunctions", "disassemble functions", TimerGroupName, TimerGroupDesc, opts::TimeRewrite); + + // Sibling inference for stripped binaries + // Sibling validation for nonstripped binaries + for (auto &BFI : BC->getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; + if (!shouldDisassemble(Function)) + continue; + CFIRdWrt->fillLSDAAddressFor(Function); + if (Function.getLSDAAddress() != 0) + Function.parseLSDA(getLSDAData(), getLSDAAddress(), true); + } + + // Disassembly and Branch Target Analysis for (auto &BFI : BC->getBinaryFunctions()) { BinaryFunction &Function = BFI.second; @@ -2900,6 +2913,7 @@ Function.print(outs(), "after disassembly", true); } + // Post-disassembly BC->processInterproceduralReferences(); BC->populateJumpTables(); @@ -2943,9 +2957,8 @@ } // Parse LSDA. - if (Function.getLSDAAddress() != 0 && - !BC->getFragmentsToSkip().count(&Function)) - Function.parseLSDA(getLSDAData(), getLSDAAddress()); + if (Function.getLSDAAddress() != 0) + Function.parseLSDA(getLSDAData(), getLSDAAddress(), false); } }