diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -32,6 +32,7 @@ namespace bolt { class BinaryFunction; +class BOLTLinker; class JumpTable; class BinaryBasicBlock { @@ -829,7 +830,7 @@ } /// Update addresses of special instructions inside this basic block. - void updateOutputValues(const MCAsmLayout &Layout); + void updateOutputValues(const BOLTLinker &Linker); /// Return mapping of input offsets to symbols in the output. LocSymsTy &getLocSyms() { diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1220,7 +1220,7 @@ } /// Update output values of the function based on the final \p Layout. - void updateOutputValues(const MCAsmLayout &Layout); + void updateOutputValues(const BOLTLinker &Linker); /// Return mapping of input to output addresses. Most users should call /// translateInputToOutputAddress() for address translation. diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -14,7 +14,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" #include "llvm/DWP/DWP.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/ToolOutputFile.h" #include @@ -31,6 +30,7 @@ namespace bolt { class BinaryContext; +class BOLTLinker; class DWARFRewriter { public: @@ -190,7 +190,7 @@ void updateDebugInfo(); /// Update stmt_list for CUs based on the new .debug_line \p Layout. - void updateLineTableOffsets(const MCAsmLayout &Layout); + void updateLineTableOffsets(const BOLTLinker &Linker); uint64_t getDwoRangesBase(uint64_t DWOId) { return DwoRangesBase[DWOId]; } diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -187,7 +187,7 @@ void mapAllocatableSections(BOLTLinker::SectionMapper MapSection); /// Update output object's values based on the final \p Layout. - void updateOutputValues(const MCAsmLayout &Layout); + void updateOutputValues(const BOLTLinker &Linker); /// Rewrite back all functions (hopefully optimized) that fit in the original /// memory footprint for that function. If the function is now larger and does diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -14,7 +14,6 @@ #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryFunction.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Errc.h" @@ -613,16 +612,16 @@ return NewBlock; } -void BinaryBasicBlock::updateOutputValues(const MCAsmLayout &Layout) { +void BinaryBasicBlock::updateOutputValues(const BOLTLinker &Linker) { if (!LocSyms) return; const uint64_t BBAddress = getOutputAddressRange().first; - const uint64_t BBOffset = Layout.getSymbolOffset(*getLabel()); for (const auto &LocSymKV : *LocSyms) { const uint32_t InputFunctionOffset = LocSymKV.first; - const uint32_t OutputOffset = static_cast( - Layout.getSymbolOffset(*LocSymKV.second) - BBOffset); + const auto OutputAddress = Linker.lookupSymbol(LocSymKV.second->getName()); + assert(OutputAddress && "Cannot find LocSym symbol"); + const auto OutputOffset = *OutputAddress - BBAddress; getOffsetTranslationTable().emplace_back( std::make_pair(OutputOffset, InputFunctionOffset)); diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -183,6 +183,14 @@ std::unique_ptr Ctx( new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); + + // This ensures that symbols created with MCContext::createNamedTempSymbol are + // not marked as temporary and end-up in the object file's symbol table. We + // need this because we rely on the linker to report symbol values to update + // our output values. If these symbols are not emitted to the symbol table, + // the linker will never see them. + Ctx->setAllowTemporaryLabels(false); + std::unique_ptr MOFI( TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); Ctx->setObjectFileInfo(MOFI.get()); diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -489,7 +489,7 @@ if (!EmitCodeOnly && BF.requiresAddressTranslation() && BC.MIB->getOffset(Instr)) { const uint32_t Offset = *BC.MIB->getOffset(Instr); - MCSymbol *LocSym = BC.Ctx->createTempSymbol(); + MCSymbol *LocSym = BC.Ctx->createNamedTempSymbol(); Streamer.emitLabel(LocSym); BB->getLocSyms().emplace_back(Offset, LocSym); } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -25,7 +25,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Demangle/Demangle.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" @@ -4012,7 +4011,7 @@ } } -void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) { +void BinaryFunction::updateOutputValues(const BOLTLinker &Linker) { if (!isEmitted()) { assert(!isInjected() && "injected function should be emitted"); setOutputAddress(getAddress()); @@ -4020,16 +4019,19 @@ return; } - const uint64_t BaseAddress = getCodeSection()->getOutputAddress(); + const auto StartAddress = Linker.lookupSymbol(getSymbol()->getName()); + assert(StartAddress && "Cannot find function entry symbol"); + const auto EndAddress = Linker.lookupSymbol(getFunctionEndLabel()->getName()); + assert(EndAddress && "Cannot find function end symbol"); + setOutputAddress(*StartAddress); + setOutputSize(*EndAddress - *StartAddress); + if (BC.HasRelocations || isInjected()) { - const uint64_t StartOffset = Layout.getSymbolOffset(*getSymbol()); - const uint64_t EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel()); - setOutputAddress(BaseAddress + StartOffset); - setOutputSize(EndOffset - StartOffset); if (hasConstantIsland()) { - const uint64_t DataOffset = - Layout.getSymbolOffset(*getFunctionConstantIslandLabel()); - setOutputDataAddress(BaseAddress + DataOffset); + const auto DataAddress = + Linker.lookupSymbol(getFunctionConstantIslandLabel()->getName()); + assert(DataAddress && "Cannot find function CI symbol"); + setOutputDataAddress(*DataAddress); for (auto It : Islands->Offsets) { const uint64_t OldOffset = It.first; BinaryData *BD = BC.getBinaryDataAtAddress(getAddress() + OldOffset); @@ -4037,8 +4039,11 @@ continue; MCSymbol *Symbol = It.second; - const uint64_t NewOffset = Layout.getSymbolOffset(*Symbol); - BD->setOutputLocation(*getCodeSection(), NewOffset); + const auto NewAddress = Linker.lookupSymbol(Symbol->getName()); + assert(NewAddress && "Cannot find CI symbol"); + auto &Section = *getCodeSection(); + const auto NewOffset = *NewAddress - Section.getOutputAddress(); + BD->setOutputLocation(Section, NewOffset); } } if (isSplit()) { @@ -4048,7 +4053,6 @@ // If fragment is empty, cold section might not exist if (FF.empty() && ColdSection.getError()) continue; - const uint64_t ColdBaseAddress = ColdSection->getOutputAddress(); const MCSymbol *ColdStartSymbol = getSymbol(FF.getFragmentNum()); // If fragment is empty, symbol might have not been emitted @@ -4061,21 +4065,22 @@ getFunctionEndLabel(FF.getFragmentNum()); assert(ColdEndSymbol && ColdEndSymbol->isDefined() && "split function should have defined cold end symbol"); - const uint64_t ColdStartOffset = - Layout.getSymbolOffset(*ColdStartSymbol); - const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol); - FF.setAddress(ColdBaseAddress + ColdStartOffset); - FF.setImageSize(ColdEndOffset - ColdStartOffset); + const auto ColdStartAddress = + Linker.lookupSymbol(ColdStartSymbol->getName()); + assert(ColdStartAddress && "Cannot find cold start symbol"); + const auto ColdEndAddress = + Linker.lookupSymbol(ColdEndSymbol->getName()); + assert(ColdEndAddress && "Cannot find cold start symbol"); + FF.setAddress(*ColdStartAddress); + FF.setImageSize(*ColdEndAddress - *ColdStartAddress); if (hasConstantIsland()) { - const uint64_t DataOffset = - Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel()); - setOutputColdDataAddress(ColdBaseAddress + DataOffset); + const auto DataAddress = Linker.lookupSymbol( + getFunctionColdConstantIslandLabel()->getName()); + assert(DataAddress && "Cannot find cold CI symbol"); + setOutputColdDataAddress(*DataAddress); } } } - } else { - setOutputAddress(getAddress()); - setOutputSize(Layout.getSymbolOffset(*getFunctionEndLabel())); } // Update basic block output ranges for the debug info, if we have @@ -4110,15 +4115,15 @@ assert(FragmentBaseAddress == getOutputAddress()); } - const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel()); - const uint64_t BBAddress = FragmentBaseAddress + BBOffset; - BB->setOutputStartAddress(BBAddress); + const auto BBAddress = Linker.lookupSymbol(BB->getLabel()->getName()); + assert(BBAddress && "Cannot find BB symbol"); + BB->setOutputStartAddress(*BBAddress); if (PrevBB) - PrevBB->setOutputEndAddress(BBAddress); + PrevBB->setOutputEndAddress(*BBAddress); PrevBB = BB; - BB->updateOutputValues(Layout); + BB->updateOutputValues(Linker); } PrevBB->setOutputEndAddress(PrevBB->isSplit() diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -215,7 +215,12 @@ // If a local symbol is escaped, then the function (potentially) has // multiple entry points and we exclude such functions from // comparison. - if (SymbolA->isTemporary() && SymbolB->isTemporary()) + auto IsLocal = [&BC](const MCSymbol &Sym) { + return Sym.getName().starts_with( + BC.AsmInfo->getPrivateGlobalPrefix()); + }; + + if (IsLocal(*SymbolA) && IsLocal(*SymbolB)) return true; // Compare symbols as functions. diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -30,7 +30,6 @@ #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Object/ObjectFile.h" @@ -1232,7 +1231,7 @@ } } -void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) { +void DWARFRewriter::updateLineTableOffsets(const BOLTLinker &Linker) { ErrorOr DbgInfoSection = BC.getUniqueSectionByName(".debug_info"); ErrorOr TypeInfoSection = @@ -1266,10 +1265,11 @@ if (!AttrVal) continue; - const uint64_t LineTableOffset = Layout.getSymbolOffset(*Label); - DebugLineOffsetMap[GetStatementListValue(CU.get())] = LineTableOffset; + const auto LineTableOffset = Linker.lookupSymbol(Label->getName()); + assert(LineTableOffset && "Cannot find line table symbol"); + DebugLineOffsetMap[GetStatementListValue(CU.get())] = *LineTableOffset; assert(DbgInfoSection && ".debug_info section must exist"); - LineTablePatchMap[CU.get()] = LineTableOffset; + LineTablePatchMap[CU.get()] = *LineTableOffset; } for (const std::unique_ptr &TU : BC.DwCtx->types_section_units()) { diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp --- a/bolt/lib/Rewrite/MachORewriteInstance.cpp +++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp @@ -20,7 +20,6 @@ #include "bolt/Rewrite/JITLinkLinker.h" #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" #include "bolt/Utils/Utils.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" @@ -476,9 +475,6 @@ "error creating in-memory object"); assert(Obj && "createObjectFile cannot return nullptr"); - MCAsmLayout FinalLayout( - static_cast(Streamer.get())->getAssembler()); - auto EFMM = std::make_unique(*BC); EFMM->setNewSecPrefix(getNewSecPrefix()); EFMM->setOrgSecPrefix(getOrgSecPrefix()); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -38,7 +38,6 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCStreamer.h" @@ -3228,15 +3227,12 @@ Linker->loadObject(ObjectMemBuffer->getMemBufferRef(), [this](auto MapSection) { mapFileSections(MapSection); }); - MCAsmLayout FinalLayout( - static_cast(Streamer.get())->getAssembler()); - // Update output addresses based on the new section map and // layout. Only do this for the object created by ourselves. - updateOutputValues(FinalLayout); + updateOutputValues(*Linker); if (opts::UpdateDebugSections) - DebugInfoRewriter->updateLineTableOffsets(FinalLayout); + DebugInfoRewriter->updateLineTableOffsets(*Linker); if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) { @@ -3628,9 +3624,9 @@ } } -void RewriteInstance::updateOutputValues(const MCAsmLayout &Layout) { +void RewriteInstance::updateOutputValues(const BOLTLinker &Linker) { for (BinaryFunction *Function : BC->getAllBinaryFunctions()) - Function->updateOutputValues(Layout); + Function->updateOutputValues(Linker); } void RewriteInstance::patchELFPHDRTable() { diff --git a/bolt/test/X86/MachO/emit_new_binary_with_external_symbol.test b/bolt/test/X86/MachO/emit_new_binary_with_external_symbol.test --- a/bolt/test/X86/MachO/emit_new_binary_with_external_symbol.test +++ b/bolt/test/X86/MachO/emit_new_binary_with_external_symbol.test @@ -10,19 +10,15 @@ # ORIGINAL-NEXT: 100000f5f: 89 7d f8 movl %edi, -8(%rbp) # ORIGINAL-NEXT: 100000f62: 48 89 75 f0 movq %rsi, -16(%rbp) # ORIGINAL-NEXT: 100000f66: 83 7d f8 01 cmpl $1, -8(%rbp) -# ORIGINAL-NEXT: 100000f6a: 7e 0a jle 0x100000f76 -# ORIGINAL-NEXT: 100000f6c: e8 25 00 00 00 callq 0x100000f96 -# ORIGINAL-NEXT: 100000f71: 89 45 fc movl %eax, -4(%rbp) -# ORIGINAL-NEXT: 100000f74: eb 10 jmp 0x100000f86 -# ORIGINAL-NEXT: 100000f76: c7 45 ec 01 00 00 00 movl $1, -20(%rbp) -# ORIGINAL-NEXT: 100000f7d: 8b 45 ec movl -20(%rbp), %eax -# ORIGINAL-NEXT: 100000f80: 83 c0 02 addl $2, %eax -# ORIGINAL-NEXT: 100000f83: 89 45 fc movl %eax, -4(%rbp) -# ORIGINAL-NEXT: 100000f86: 8b 45 fc movl -4(%rbp), %eax -# ORIGINAL-NEXT: 100000f89: 48 83 c4 20 addq $32, %rsp -# ORIGINAL-NEXT: 100000f8d: 5d popq %rbp -# ORIGINAL-NEXT: 100000f8e: c3 retq -# ORIGINAL-NEXT: 100000f8f: fc cld +# ORIGINAL-NEXT: 100000f6a: 0f 8e 0d 00 00 00 jle 0x100000f7d +# ORIGINAL-NEXT: 100000f70: e8 21 00 00 00 callq 0x100000f96 ## symbol stub for: _f +# ORIGINAL-NEXT: 100000f75: 89 45 fc movl %eax, -4(%rbp) +# ORIGINAL-NEXT: 100000f78: e9 10 00 00 00 jmp 0x100000f8d +# ORIGINAL-NEXT: 100000f7d: c7 45 ec 01 00 00 00 movl $1, -20(%rbp) +# ORIGINAL-NEXT: 100000f84: 8b 45 ec movl -20(%rbp), %eax +# ORIGINAL-NEXT: 100000f87: 83 c0 02 addl $2, %eax +# ORIGINAL-NEXT: 100000f8a: 89 45 fc movl %eax, -4(%rbp) +# ORIGINAL-NEXT: 100000f8d: 8b 45 fc movl -4(%rbp), %eax # ORIGINAL-NEXT: 100000f90: 48 83 c4 20 addq $32, %rsp # ORIGINAL-NEXT: 100000f94: 5d popq %rbp # ORIGINAL-NEXT: 100000f95: c3 retq @@ -38,18 +34,15 @@ # REVERSED-NEXT: 100000f5f: 89 7d f8 movl %edi, -8(%rbp) # REVERSED-NEXT: 100000f62: 48 89 75 f0 movq %rsi, -16(%rbp) # REVERSED-NEXT: 100000f66: 83 7d f8 01 cmpl $1, -8(%rbp) -# REVERSED-NEXT: 100000f6a: 7e 0b jle 0x100000f77 -# REVERSED-NEXT: 100000f6c: eb 1b jmp 0x100000f89 -# REVERSED-NEXT: 100000f6e: 8b 45 fc movl -4(%rbp), %eax -# REVERSED-NEXT: 100000f71: 48 83 c4 20 addq $32, %rsp -# REVERSED-NEXT: 100000f75: 5d popq %rbp -# REVERSED-NEXT: 100000f76: c3 retq -# REVERSED-NEXT: 100000f77: c7 45 ec 01 00 00 00 movl $1, -20(%rbp) -# REVERSED-NEXT: 100000f7e: 8b 45 ec movl -20(%rbp), %eax -# REVERSED-NEXT: 100000f81: 83 c0 02 addl $2, %eax -# REVERSED-NEXT: 100000f84: 89 45 fc movl %eax, -4(%rbp) -# REVERSED-NEXT: 100000f87: eb e5 jmp 0x100000f6e -# REVERSED-NEXT: 100000f89: e8 08 00 00 00 callq 0x100000f96 -# REVERSED-NEXT: 100000f8e: 89 45 fc movl %eax, -4(%rbp) -# REVERSED-NEXT: 100000f91: eb db jmp 0x100000f6e -# REVERSED-NEXT: 100000f93: 20 5d c3 andb %bl, -61(%rbp) +# REVERSED-NEXT: 100000f6a: 0f 8e 0d 00 00 00 jle 0x100000f7d +# REVERSED-NEXT: 100000f70: e8 21 00 00 00 callq 0x100000f96 ## symbol stub for: _f +# REVERSED-NEXT: 100000f75: 89 45 fc movl %eax, -4(%rbp) +# REVERSED-NEXT: 100000f78: e9 10 00 00 00 jmp 0x100000f8d +# REVERSED-NEXT: 100000f7d: c7 45 ec 01 00 00 00 movl $1, -20(%rbp) +# REVERSED-NEXT: 100000f84: 8b 45 ec movl -20(%rbp), %eax +# REVERSED-NEXT: 100000f87: 83 c0 02 addl $2, %eax +# REVERSED-NEXT: 100000f8a: 89 45 fc movl %eax, -4(%rbp) +# REVERSED-NEXT: 100000f8d: 8b 45 fc movl -4(%rbp), %eax +# REVERSED-NEXT: 100000f90: 48 83 c4 20 addq $32, %rsp +# REVERSED-NEXT: 100000f94: 5d popq %rbp +# REVERSED-NEXT: 100000f95: c3 retq