diff --git a/bolt/include/bolt/Core/AddressMap.h b/bolt/include/bolt/Core/AddressMap.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Core/AddressMap.h @@ -0,0 +1,43 @@ +#ifndef BOLT_CORE_ADDRESS_MAP_H +#define BOLT_CORE_ADDRESS_MAP_H + +#include "llvm/ADT/StringRef.h" + +#include +#include + +namespace llvm { + +class MCStreamer; + +namespace bolt { + +class BinaryContext; + +class AddressMap { + using MapTy = std::unordered_multimap; + MapTy Map; + +public: + static const char *const SectionName; + + static void emit(MCStreamer &Streamer, BinaryContext &BC); + static AddressMap parse(StringRef Buffer); + + std::optional lookup(uint64_t InputAddress) const { + auto It = Map.find(InputAddress); + if (It != Map.end()) + return It->second; + return std::nullopt; + } + + std::pair + lookupAll(uint64_t InputAddress) const { + return Map.equal_range(InputAddress); + } +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -100,16 +100,6 @@ using LocSymsTy = std::vector>; std::unique_ptr LocSyms; - /// After output/codegen, map output offsets of instructions in this basic - /// block to instruction offsets in the original function. Note that the - /// output basic block could be different from the input basic block. - /// We only map instruction of interest, such as calls and markers. - /// - /// We store the offset array in a basic block to facilitate BAT tables - /// generation. Otherwise, the mapping could be done at function level. - using OffsetTranslationTableTy = std::vector>; - std::unique_ptr OffsetTranslationTable; - /// Alignment requirements for the block. uint32_t Alignment{1}; @@ -828,8 +818,7 @@ return OutputAddressRange; } - /// Update addresses of special instructions inside this basic block. - void updateOutputValues(const MCAsmLayout &Layout); + bool hasLocSyms() const { return LocSyms != nullptr; } /// Return mapping of input offsets to symbols in the output. LocSymsTy &getLocSyms() { @@ -841,19 +830,6 @@ return const_cast(this)->getLocSyms(); } - /// Return offset translation table for the basic block. - OffsetTranslationTableTy &getOffsetTranslationTable() { - return OffsetTranslationTable - ? *OffsetTranslationTable - : *(OffsetTranslationTable = - std::make_unique()); - } - - /// Return offset translation table for the basic block. - const OffsetTranslationTableTy &getOffsetTranslationTable() const { - return const_cast(this)->getOffsetTranslationTable(); - } - /// Return size of the basic block in the output binary. uint64_t getOutputSize() const { return OutputAddressRange.second - OutputAddressRange.first; diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -13,6 +13,7 @@ #ifndef BOLT_CORE_BINARY_CONTEXT_H #define BOLT_CORE_BINARY_CONTEXT_H +#include "bolt/Core/AddressMap.h" #include "bolt/Core/BinaryData.h" #include "bolt/Core/BinarySection.h" #include "bolt/Core/DebugData.h" @@ -221,6 +222,9 @@ bool ContainsDwarf5{false}; bool ContainsDwarfLegacy{false}; + /// Mapping from input to output addresses. + std::optional IOAddressMap; + /// Preprocess DWO debug information. void preprocessDWODebugInfo(); @@ -1330,6 +1334,12 @@ /* DWARFMustBeAtTheEnd */ false)); return Streamer; } + + void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); } + const AddressMap &getIOAddressMap() const { + assert(IOAddressMap && "Address map not set yet"); + return *IOAddressMap; + } }; template > diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -573,9 +573,6 @@ /// Count the number of functions created. static uint64_t Count; - /// Map offsets of special instructions to addresses in the output. - InputOffsetToAddressMapTy InputOffsetToAddressMap; - /// Register alternative function name. void addAlternativeName(std::string NewName) { Aliases.push_back(std::move(NewName)); @@ -1222,13 +1219,6 @@ /// Update output values of the function based on the final \p Layout. void updateOutputValues(const MCAsmLayout &Layout); - /// Return mapping of input to output addresses. Most users should call - /// translateInputToOutputAddress() for address translation. - InputOffsetToAddressMapTy &getInputOffsetToAddressMap() { - assert(isEmitted() && "cannot use address mapping before code emission"); - return InputOffsetToAddressMap; - } - /// Register relocation type \p RelType at a given \p Address in the function /// against \p Symbol. /// Assert if the \p Address is not inside this function. @@ -2161,6 +2151,11 @@ /// its code emission. bool requiresAddressTranslation() const; + /// Return true if the linker needs to generate an address map for this + /// function. Used for keeping track of the mapping from input to out + /// addresses of basic blocks. + bool requiresAddressMap() const; + /// Adjust branch instructions to match the CFG. /// /// As it comes to internal branches, the CFG represents "the ultimate source diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h --- a/bolt/include/bolt/Profile/BoltAddressTranslation.h +++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h @@ -116,7 +116,7 @@ /// emitted for the start of the BB. More entries may be emitted to cover /// the location of calls or any instruction that may change control flow. void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB, - uint64_t FuncAddress); + uint64_t FuncAddress, const BinaryContext &BC); std::map Maps; diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -379,6 +379,10 @@ /// Return true if the section holds linux kernel symbol information. static bool isKSymtabSection(StringRef SectionName); + /// Return true if the section is only used while linking and will not appear + /// in the output file. + static bool isLinkOnlySection(StringRef SectionName); + /// Adds Debug section to overwrite. static void addToDebugSectionsToOverwrite(const char *Section) { DebugSectionsToOverwrite.emplace_back(Section); diff --git a/bolt/lib/Core/AddressMap.cpp b/bolt/lib/Core/AddressMap.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Core/AddressMap.cpp @@ -0,0 +1,57 @@ +#include "bolt/Core/AddressMap.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { +namespace bolt { + +const char *const AddressMap::SectionName = ".bolt.address_map"; + +static void emitLabel(MCStreamer &Streamer, uint64_t InputAddress, + const MCSymbol *OutputLabel) { + Streamer.emitIntValue(InputAddress, 8); + Streamer.emitSymbolValue(OutputLabel, 8); +} + +void AddressMap::emit(MCStreamer &Streamer, BinaryContext &BC) { + Streamer.switchSection(BC.getDataSection(SectionName)); + + for (const auto &[BFAddress, BF] : BC.getBinaryFunctions()) { + if (!BF.requiresAddressMap()) + continue; + + for (const auto &BB : BF) { + if (!BB.getLabel()->isDefined()) + continue; + + emitLabel(Streamer, BFAddress + BB.getInputAddressRange().first, + BB.getLabel()); + + if (!BB.hasLocSyms()) + continue; + + for (auto [Offset, Symbol] : BB.getLocSyms()) + emitLabel(Streamer, BFAddress + Offset, Symbol); + } + } +} + +AddressMap AddressMap::parse(StringRef Buffer) { + assert(Buffer.size() % 16 == 0 && "Unexpected address map size"); + + AddressMap Parsed; + Parsed.Map.reserve(Buffer.size() / 16); + const auto *const MapData = Buffer.data(); + + for (size_t I = 0; I < Buffer.size(); I += 16) { + const auto Input = support::endian::read64le(MapData + I); + const auto Output = support::endian::read64le(MapData + I + 8); + Parsed.Map.insert({Input, Output}); + } + + return Parsed; +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -613,27 +613,5 @@ return NewBlock; } -void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) { - if (!LocSyms) - return; - - const uint64_t BBAddress = getOutputAddressRange().first; - const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel()); - for (const auto &LocSymKV : *LocSyms) { - const uint32_t InputFunctionOffset = LocSymKV.first; - const uint32_t OutputOffset = static_cast( - Layout.getSymbolOffset(*LocSymKV.second) - BBOffset); - getOffsetTranslationTable().emplace_back( - std::make_pair(OutputOffset, InputFunctionOffset)); - - // Update reverse (relative to BAT) address lookup table for function. - if (getFunction()->requiresAddressTranslation()) { - getFunction()->getInputOffsetToAddressMap().emplace( - std::make_pair(InputFunctionOffset, OutputOffset + BBAddress)); - } - } - LocSyms.reset(nullptr); -} - } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2845,6 +2845,14 @@ return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe(); } +bool BinaryFunction::requiresAddressMap() const { + if (isInjected()) + return false; + + return opts::UpdateDebugSections || isMultiEntry() || + requiresAddressTranslation(); +} + uint64_t BinaryFunction::getInstructionCount() const { uint64_t Count = 0; for (const BinaryBasicBlock &BB : blocks()) @@ -4110,15 +4118,13 @@ assert(FragmentBaseAddress == getOutputAddress()); } - const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel()); - const uint64_t BBAddress = FragmentBaseAddress + BBOffset; + const uint64_t BBAddress = + *BC.getIOAddressMap().lookup(BB->getInputOffset() + getAddress()); BB->setOutputStartAddress(BBAddress); if (PrevBB) PrevBB->setOutputEndAddress(BBAddress); PrevBB = BB; - - BB->updateOutputValues(Layout); } PrevBB->setOutputEndAddress(PrevBB->isSplit() @@ -4171,9 +4177,8 @@ // Check if the address is associated with an instruction that is tracked // by address translation. - auto KV = InputOffsetToAddressMap.find(Address - getAddress()); - if (KV != InputOffsetToAddressMap.end()) - return KV->second; + if (auto OutputAddress = BC.getIOAddressMap().lookup(Address)) + return *OutputAddress; // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay // intact. Instead we can use pseudo instructions and/or annotations. diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt --- a/bolt/lib/Core/CMakeLists.txt +++ b/bolt/lib/Core/CMakeLists.txt @@ -11,6 +11,7 @@ ) add_llvm_library(LLVMBOLTCore + AddressMap.cpp BinaryBasicBlock.cpp BinaryContext.cpp BinaryData.cpp diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -20,7 +20,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB, - uint64_t FuncAddress) { + uint64_t FuncAddress, + const BinaryContext &BC) { const uint64_t BBOutputOffset = BB.getOutputAddressRange().first - FuncAddress; const uint32_t BBInputOffset = BB.getInputOffset(); @@ -46,9 +47,11 @@ // allowing it to overwrite the previously inserted key in the map. Map[BBOutputOffset] = BBInputOffset; - for (const auto &IOPair : BB.getOffsetTranslationTable()) { - const uint64_t OutputOffset = IOPair.first + BBOutputOffset; - const uint32_t InputOffset = IOPair.second; + for (const auto &[InputOffset, Sym] : BB.getLocSyms()) { + const auto InputAddress = BB.getFunction()->getAddress() + InputOffset; + const auto OutputAddress = BC.getIOAddressMap().lookup(InputAddress); + assert(OutputAddress && "Unknown instruction address"); + const auto OutputOffset = *OutputAddress - FuncAddress; // Is this the first instruction in the BB? No need to duplicate the entry. if (OutputOffset == BBOutputOffset) @@ -77,7 +80,7 @@ MapTy Map; for (const BinaryBasicBlock *const BB : Function.getLayout().getMainFragment()) - writeEntriesForBB(Map, *BB, Function.getOutputAddress()); + writeEntriesForBB(Map, *BB, Function.getOutputAddress(), BC); Maps.emplace(Function.getOutputAddress(), std::move(Map)); if (!Function.isSplit()) @@ -89,7 +92,7 @@ Function.getLayout().getSplitFragments()) { Map.clear(); for (const BinaryBasicBlock *const BB : FF) - writeEntriesForBB(Map, *BB, FF.getAddress()); + writeEntriesForBB(Map, *BB, FF.getAddress(), BC); Maps.emplace(FF.getAddress(), std::move(Map)); ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress()); diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -183,9 +183,7 @@ // A call probe may be duplicated due to ICP // Go through output of InputOffsetToAddressMap to collect all related // probes - const InputOffsetToAddressMapTy &Offset2Addr = - F->getInputOffsetToAddressMap(); - auto CallOutputAddresses = Offset2Addr.equal_range(Offset); + auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first); auto CallOutputAddress = CallOutputAddresses.first; if (CallOutputAddress == CallOutputAddresses.second) { Probe->setAddress(INT64_MAX); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "bolt/Rewrite/RewriteInstance.h" +#include "bolt/Core/AddressMap.h" #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryEmitter.h" #include "bolt/Core/BinaryFunction.h" @@ -3190,6 +3191,7 @@ } emitBinaryContext(*Streamer, *BC, getOrgSecPrefix()); + AddressMap::emit(*Streamer, *BC); Streamer->finish(); if (Streamer->getContext().hadError()) { @@ -3569,6 +3571,9 @@ } for (BinarySection &Section : BC->allocatableSections()) { + if (isLinkOnlySection(Section.getName())) + continue; + if (!Section.hasValidSectionID()) continue; @@ -3631,6 +3636,12 @@ } void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { + if (auto MapSection = BC->getUniqueSectionByName(AddressMap::SectionName)) { + auto Map = AddressMap::parse(MapSection->getOutputContents()); + BC->setIOAddressMap(std::move(Map)); + BC->deregisterSection(*MapSection); + } + for (BinaryFunction *Function : BC->getAllBinaryFunctions()) Function->updateOutputValues(Layout); } @@ -4018,6 +4029,9 @@ continue; } + if (isLinkOnlySection(Section.getName())) + continue; + if (opts::Verbosity >= 1) outs() << "BOLT-INFO: writing section header for " << Section.getOutputName() << '\n'; @@ -5276,6 +5290,8 @@ for (BinarySection &Section : BC->allocatableSections()) { if (!Section.isFinalized() || !Section.getOutputData()) continue; + if (isLinkOnlySection(Section.getName())) + continue; if (opts::Verbosity >= 1) outs() << "BOLT: writing new section " << Section.getName() @@ -5472,3 +5488,10 @@ return false; } + +bool RewriteInstance::isLinkOnlySection(StringRef SectionName) { + if (SectionName == AddressMap::SectionName) + return true; + + return false; +}