diff --git a/bolt/include/bolt/Core/AddressMap.h b/bolt/include/bolt/Core/AddressMap.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Core/AddressMap.h @@ -0,0 +1,59 @@ +//===- bolt/Core/AddressMap.h - Input-output address map --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helper class to create a mapping from input to output addresses needed for +// updating debugging symbols and BAT. We emit an MCSection containing +// pairs to the object file and JITLink will +// transform this in pairs. The linker output +// can then be parsed and used to establish the mapping. +// +//===----------------------------------------------------------------------===// +// +#ifndef BOLT_CORE_ADDRESS_MAP_H +#define BOLT_CORE_ADDRESS_MAP_H + +#include "llvm/ADT/StringRef.h" + +#include +#include + +namespace llvm { + +class MCStreamer; + +namespace bolt { + +class BinaryContext; + +class AddressMap { + using MapTy = std::unordered_multimap; + MapTy Map; + +public: + static const char *const SectionName; + + static void emit(MCStreamer &Streamer, BinaryContext &BC); + static AddressMap parse(StringRef Buffer, const BinaryContext &BC); + + std::optional lookup(uint64_t InputAddress) const { + auto It = Map.find(InputAddress); + if (It != Map.end()) + return It->second; + return std::nullopt; + } + + std::pair + lookupAll(uint64_t InputAddress) const { + return Map.equal_range(InputAddress); + } +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -100,16 +100,6 @@ using LocSymsTy = std::vector>; std::unique_ptr LocSyms; - /// After output/codegen, map output offsets of instructions in this basic - /// block to instruction offsets in the original function. Note that the - /// output basic block could be different from the input basic block. - /// We only map instruction of interest, such as calls and markers. - /// - /// We store the offset array in a basic block to facilitate BAT tables - /// generation. Otherwise, the mapping could be done at function level. - using OffsetTranslationTableTy = std::vector>; - std::unique_ptr OffsetTranslationTable; - /// Alignment requirements for the block. uint32_t Alignment{1}; @@ -828,8 +818,7 @@ return OutputAddressRange; } - /// Update addresses of special instructions inside this basic block. - void updateOutputValues(const MCAsmLayout &Layout); + bool hasLocSyms() const { return LocSyms != nullptr; } /// Return mapping of input offsets to symbols in the output. LocSymsTy &getLocSyms() { @@ -841,19 +830,6 @@ return const_cast(this)->getLocSyms(); } - /// Return offset translation table for the basic block. - OffsetTranslationTableTy &getOffsetTranslationTable() { - return OffsetTranslationTable - ? *OffsetTranslationTable - : *(OffsetTranslationTable = - std::make_unique()); - } - - /// Return offset translation table for the basic block. - const OffsetTranslationTableTy &getOffsetTranslationTable() const { - return const_cast(this)->getOffsetTranslationTable(); - } - /// Return size of the basic block in the output binary. uint64_t getOutputSize() const { return OutputAddressRange.second - OutputAddressRange.first; diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -13,6 +13,7 @@ #ifndef BOLT_CORE_BINARY_CONTEXT_H #define BOLT_CORE_BINARY_CONTEXT_H +#include "bolt/Core/AddressMap.h" #include "bolt/Core/BinaryData.h" #include "bolt/Core/BinarySection.h" #include "bolt/Core/DebugData.h" @@ -221,6 +222,9 @@ bool ContainsDwarf5{false}; bool ContainsDwarfLegacy{false}; + /// Mapping from input to output addresses. + std::optional IOAddressMap; + /// Preprocess DWO debug information. void preprocessDWODebugInfo(); @@ -1343,6 +1347,12 @@ /* DWARFMustBeAtTheEnd */ false)); return Streamer; } + + void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); } + const AddressMap &getIOAddressMap() const { + assert(IOAddressMap && "Address map not set yet"); + return *IOAddressMap; + } }; template > diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -577,9 +577,6 @@ /// Count the number of functions created. static uint64_t Count; - /// Map offsets of special instructions to addresses in the output. - InputOffsetToAddressMapTy InputOffsetToAddressMap; - /// Register alternative function name. void addAlternativeName(std::string NewName) { Aliases.push_back(std::move(NewName)); @@ -1226,13 +1223,6 @@ /// Update output values of the function based on the final \p Layout. void updateOutputValues(const MCAsmLayout &Layout); - /// Return mapping of input to output addresses. Most users should call - /// translateInputToOutputAddress() for address translation. - InputOffsetToAddressMapTy &getInputOffsetToAddressMap() { - assert(isEmitted() && "cannot use address mapping before code emission"); - return InputOffsetToAddressMap; - } - /// Register relocation type \p RelType at a given \p Address in the function /// against \p Symbol. /// Assert if the \p Address is not inside this function. @@ -2180,6 +2170,11 @@ /// its code emission. bool requiresAddressTranslation() const; + /// Return true if the linker needs to generate an address map for this + /// function. Used for keeping track of the mapping from input to out + /// addresses of basic blocks. + bool requiresAddressMap() const; + /// Adjust branch instructions to match the CFG. /// /// As it comes to internal branches, the CFG represents "the ultimate source diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -97,6 +97,8 @@ mutable bool IsReordered{false}; // Have the contents been reordered? bool IsAnonymous{false}; // True if the name should not be included // in the output file. + bool IsLinkOnly{false}; // True if the section should not be included + // in the output file. uint64_t hash(const BinaryData &BD, std::map &Cache) const; @@ -452,6 +454,8 @@ void setIndex(uint32_t I) { Index = I; } void setOutputName(const Twine &Name) { OutputName = Name.str(); } void setAnonymous(bool Flag) { IsAnonymous = Flag; } + bool isLinkOnly() const { return IsLinkOnly; } + void setLinkOnly() { IsLinkOnly = true; } /// Emit the section as data, possibly with relocations. /// Use name \p SectionName for the section during the emission. diff --git a/bolt/lib/Core/AddressMap.cpp b/bolt/lib/Core/AddressMap.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Core/AddressMap.cpp @@ -0,0 +1,63 @@ +#include "bolt/Core/AddressMap.h" +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/DataExtractor.h" + +namespace llvm { +namespace bolt { + +const char *const AddressMap::SectionName = ".bolt.address_map"; + +static void emitLabel(MCStreamer &Streamer, uint64_t InputAddress, + const MCSymbol *OutputLabel) { + Streamer.emitIntValue(InputAddress, 8); + Streamer.emitSymbolValue(OutputLabel, 8); +} + +void AddressMap::emit(MCStreamer &Streamer, BinaryContext &BC) { + Streamer.switchSection(BC.getDataSection(SectionName)); + + for (const auto &[BFAddress, BF] : BC.getBinaryFunctions()) { + if (!BF.requiresAddressMap()) + continue; + + for (const auto &BB : BF) { + if (!BB.getLabel()->isDefined()) + continue; + + emitLabel(Streamer, BFAddress + BB.getInputAddressRange().first, + BB.getLabel()); + + if (!BB.hasLocSyms()) + continue; + + for (auto [Offset, Symbol] : BB.getLocSyms()) + emitLabel(Streamer, BFAddress + Offset, Symbol); + } + } +} + +AddressMap AddressMap::parse(StringRef Buffer, const BinaryContext &BC) { + const auto EntrySize = 2 * BC.AsmInfo->getCodePointerSize(); + assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size"); + + DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + DataExtractor::Cursor Cursor(0); + + AddressMap Parsed; + Parsed.Map.reserve(Buffer.size() / EntrySize); + + while (Cursor && !DE.eof(Cursor)) { + const auto Input = DE.getAddress(Cursor); + const auto Output = DE.getAddress(Cursor); + Parsed.Map.insert({Input, Output}); + } + + assert(Cursor && "Error reading address map section"); + return Parsed; +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -613,27 +613,5 @@ return NewBlock; } -void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) { - if (!LocSyms) - return; - - const uint64_t BBAddress = getOutputAddressRange().first; - const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel()); - for (const auto &LocSymKV : *LocSyms) { - const uint32_t InputFunctionOffset = LocSymKV.first; - const uint32_t OutputOffset = static_cast( - Layout.getSymbolOffset(*LocSymKV.second) - BBOffset); - getOffsetTranslationTable().emplace_back( - std::make_pair(OutputOffset, InputFunctionOffset)); - - // Update reverse (relative to BAT) address lookup table for function. - if (getFunction()->requiresAddressTranslation()) { - getFunction()->getInputOffsetToAddressMap().emplace( - std::make_pair(InputFunctionOffset, OutputOffset + BBAddress)); - } - } - LocSyms.reset(nullptr); -} - } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -214,6 +214,10 @@ } emitDataSections(OrgSecPrefix); + + // TODO Enable for Mach-O once BinaryContext::getDataSection supports it. + if (BC.isELF()) + AddressMap::emit(Streamer, BC); } void BinaryEmitter::emitFunctions() { diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2855,6 +2855,14 @@ return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe(); } +bool BinaryFunction::requiresAddressMap() const { + if (isInjected()) + return false; + + return opts::UpdateDebugSections || isMultiEntry() || + requiresAddressTranslation(); +} + uint64_t BinaryFunction::getInstructionCount() const { uint64_t Count = 0; for (const BinaryBasicBlock &BB : blocks()) @@ -4120,15 +4128,13 @@ assert(FragmentBaseAddress == getOutputAddress()); } - const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel()); - const uint64_t BBAddress = FragmentBaseAddress + BBOffset; + const uint64_t BBAddress = + *BC.getIOAddressMap().lookup(BB->getInputOffset() + getAddress()); BB->setOutputStartAddress(BBAddress); if (PrevBB) PrevBB->setOutputEndAddress(BBAddress); PrevBB = BB; - - BB->updateOutputValues(Layout); } PrevBB->setOutputEndAddress(PrevBB->isSplit() @@ -4181,9 +4187,8 @@ // Check if the address is associated with an instruction that is tracked // by address translation. - auto KV = InputOffsetToAddressMap.find(Address - getAddress()); - if (KV != InputOffsetToAddressMap.end()) - return KV->second; + if (auto OutputAddress = BC.getIOAddressMap().lookup(Address)) + return *OutputAddress; // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay // intact. Instead we can use pseudo instructions and/or annotations. diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt --- a/bolt/lib/Core/CMakeLists.txt +++ b/bolt/lib/Core/CMakeLists.txt @@ -11,6 +11,7 @@ ) add_llvm_library(LLVMBOLTCore + AddressMap.cpp BinaryBasicBlock.cpp BinaryContext.cpp BinaryData.cpp diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -46,9 +46,14 @@ // allowing it to overwrite the previously inserted key in the map. Map[BBOutputOffset] = BBInputOffset; - for (const auto &IOPair : BB.getOffsetTranslationTable()) { - const uint64_t OutputOffset = IOPair.first + BBOutputOffset; - const uint32_t InputOffset = IOPair.second; + const auto &IOAddressMap = + BB.getFunction()->getBinaryContext().getIOAddressMap(); + + for (const auto &[InputOffset, Sym] : BB.getLocSyms()) { + const auto InputAddress = BB.getFunction()->getAddress() + InputOffset; + const auto OutputAddress = IOAddressMap.lookup(InputAddress); + assert(OutputAddress && "Unknown instruction address"); + const auto OutputOffset = *OutputAddress - FuncAddress; // Is this the first instruction in the BB? No need to duplicate the entry. if (OutputOffset == BBOutputOffset) diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -183,9 +183,7 @@ // A call probe may be duplicated due to ICP // Go through output of InputOffsetToAddressMap to collect all related // probes - const InputOffsetToAddressMapTy &Offset2Addr = - F->getInputOffsetToAddressMap(); - auto CallOutputAddresses = Offset2Addr.equal_range(Offset); + auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first); auto CallOutputAddress = CallOutputAddresses.first; if (CallOutputAddress == CallOutputAddresses.second) { Probe->setAddress(INT64_MAX); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "bolt/Rewrite/RewriteInstance.h" +#include "bolt/Core/AddressMap.h" #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryEmitter.h" #include "bolt/Core/BinaryFunction.h" @@ -3170,6 +3171,9 @@ ROFlags); BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata.cold", ELF::SHT_PROGBITS, ROFlags); + BC->registerOrUpdateSection(AddressMap::SectionName, ELF::SHT_PROGBITS, + ROFlags) + .setLinkOnly(); } void RewriteInstance::emitAndLink() { @@ -3575,6 +3579,9 @@ } for (BinarySection &Section : BC->allocatableSections()) { + if (Section.isLinkOnly()) + continue; + if (!Section.hasValidSectionID()) continue; @@ -3637,6 +3644,12 @@ } void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { + if (auto MapSection = BC->getUniqueSectionByName(AddressMap::SectionName)) { + auto Map = AddressMap::parse(MapSection->getOutputContents(), *BC); + BC->setIOAddressMap(std::move(Map)); + BC->deregisterSection(*MapSection); + } + for (BinaryFunction *Function : BC->getAllBinaryFunctions()) Function->updateOutputValues(Layout); } @@ -5282,6 +5295,8 @@ for (BinarySection &Section : BC->allocatableSections()) { if (!Section.isFinalized() || !Section.getOutputData()) continue; + if (Section.isLinkOnly()) + continue; if (opts::Verbosity >= 1) outs() << "BOLT: writing new section " << Section.getName()