diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1420,6 +1420,11 @@ llvm_unreachable("not implemented"); } + virtual bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const { + llvm_unreachable("not implemented"); + return false; + } + virtual int getShortJmpEncodingSize() const { llvm_unreachable("not implemented"); } diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h --- a/bolt/include/bolt/Core/Relocation.h +++ b/bolt/include/bolt/Core/Relocation.h @@ -58,8 +58,10 @@ /// Skip relocations that we don't want to handle in BOLT static bool skipRelocationType(uint64_t Type); - /// Handle special cases when relocation should not be processed by BOLT - static bool skipRelocationProcess(uint64_t Type, uint64_t Contents); + /// Handle special cases when relocation should not be processed by BOLT or + /// change relocation \p Type to proper one before continuing if \p Contents + /// and \P Type mismatch occured. + static bool skipRelocationProcess(uint64_t &Type, uint64_t Contents); // Adjust value depending on relocation type (make it PC relative or not) static uint64_t adjustValue(uint64_t Type, uint64_t Value, diff --git a/bolt/include/bolt/Passes/FixRelaxationPass.h b/bolt/include/bolt/Passes/FixRelaxationPass.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/FixRelaxationPass.h @@ -0,0 +1,40 @@ +//===- bolt/Passes/ADRRelaxationPass.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the FixRelaxations class, which locates instructions with +// wrong targets and fixes them. Such problems usually occures when linker +// relaxes (changes) instructions, but doesn't fix relocations types properly +// for them. +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_FIXRELAXATIONPASS_H +#define BOLT_PASSES_FIXRELAXATIONPASS_H + +#include "bolt/Passes/BinaryPasses.h" + +namespace llvm { +namespace bolt { + +class FixRelaxations : public BinaryFunctionPass { + void runOnFunction(BinaryFunction &Function); + +public: + explicit FixRelaxations(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + const char *getName() const override { return "fix-relaxations"; } + + /// Pass entry point + void runOnFunctions(BinaryContext &BC) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -247,7 +247,7 @@ /// The \p SymbolName, \p SymbolAddress, \p Addend and \p ExtractedValue /// parameters will be set on success. The \p Skip argument indicates /// that the relocation was analyzed, but it must not be processed. - bool analyzeRelocation(const object::RelocationRef &Rel, uint64_t RType, + bool analyzeRelocation(const object::RelocationRef &Rel, uint64_t &RType, std::string &SymbolName, bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, uint64_t &ExtractedValue, bool &Skip) const; diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -171,11 +171,11 @@ return Type == ELF::R_AARCH64_NONE || Type == ELF::R_AARCH64_LD_PREL_LO19; } -bool skipRelocationProcessX86(uint64_t Type, uint64_t Contents) { +bool skipRelocationProcessX86(uint64_t &Type, uint64_t Contents) { return false; } -bool skipRelocationProcessAArch64(uint64_t Type, uint64_t Contents) { +bool skipRelocationProcessAArch64(uint64_t &Type, uint64_t Contents) { auto IsMov = [](uint64_t Contents) -> bool { // The bits 28-23 are 0b100101 return (Contents & 0x1f800000) == 0x12800000; @@ -191,12 +191,25 @@ return (Contents & 0x9f000000) == 0x10000000; }; + auto IsAddImm = [](uint64_t Contents) -> bool { + // The bits 30-23 are 0b00100010 + return (Contents & 0x7F800000) == 0x11000000; + }; + auto IsNop = [](uint64_t Contents) -> bool { return Contents == 0xd503201f; }; // The linker might eliminate the instruction and replace it with NOP, ignore if (IsNop(Contents)) return true; + // The linker might relax ADRP+LDR instruction sequence for loading symbol + // address from GOT table to ADRP+ADD sequence that would point to the + // binary-local symbol. Change relocation type in order to process it right. + if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) { + Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; + return false; + } + // The linker might perform TLS relocations relaxations, such as // changed TLS access model (e.g. changed global dynamic model // to initial exec), thus changing the instructions. The static @@ -548,7 +561,7 @@ return skipRelocationTypeX86(Type); } -bool Relocation::skipRelocationProcess(uint64_t Type, uint64_t Contents) { +bool Relocation::skipRelocationProcess(uint64_t &Type, uint64_t Contents) { if (Arch == Triple::aarch64) return skipRelocationProcessAArch64(Type, Contents); return skipRelocationProcessX86(Type, Contents); diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -13,6 +13,7 @@ DataflowInfoManager.cpp FrameAnalysis.cpp FrameOptimizer.cpp + FixRelaxationPass.cpp HFSort.cpp HFSortPlus.cpp Hugify.cpp diff --git a/bolt/lib/Passes/FixRelaxationPass.cpp b/bolt/lib/Passes/FixRelaxationPass.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Passes/FixRelaxationPass.cpp @@ -0,0 +1,63 @@ +#include "bolt/Passes/FixRelaxationPass.h" +#include "bolt/Core/ParallelUtilities.h" + +using namespace llvm; + +namespace llvm { +namespace bolt { + +// This function finds ADRP+ADD instruction sequences that originally before +// linker relaxations were ADRP+LDR. We've modified LDR/ADD relocation properly +// during relocation reading, so its targeting right symbol. As for ADRP its +// target is wrong before this pass since we won't be able to recognize and +// properly change R_AARCH64_ADR_GOT_PAGE relocation to +// R_AARCH64_ADR_PREL_PG_HI21 during relocation reading. Now we're searching for +// ADRP+ADD sequences, checking that ADRP points to the GOT-table symbol and the +// target of ADD is another symbol. When found change ADRP symbol reference to +// the ADDs one. +void FixRelaxations::runOnFunction(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + for (BinaryBasicBlock &BB : BF) { + for (auto II = BB.begin(); II != BB.end(); ++II) { + MCInst &Adrp = *II; + if (BC.MIB->isPseudo(Adrp) || !BC.MIB->isADRP(Adrp)) + continue; + + const MCSymbol *AdrpSymbol = BC.MIB->getTargetSymbol(Adrp); + if (!AdrpSymbol || AdrpSymbol->getName() != "__BOLT_got_zero") + continue; + + auto NextII = std::next(II); + if (NextII == BB.end()) + continue; + + const MCInst &Add = *NextII; + if (!BC.MIB->matchAdrpAddPair(Adrp, Add)) + continue; + + const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Add); + if (!Symbol || AdrpSymbol == Symbol) + continue; + + const int64_t Addend = BC.MIB->getTargetAddend(Add); + BC.MIB->setOperandToSymbolRef(Adrp, /*OpNum*/ 1, Symbol, Addend, + BC.Ctx.get(), ELF::R_AARCH64_NONE); + } + } +} + +void FixRelaxations::runOnFunctions(BinaryContext &BC) { + if (!BC.isAArch64() || !BC.HasRelocations) + return; + + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + runOnFunction(BF); + }; + + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, nullptr, + "FixRelaxations"); +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -12,6 +12,7 @@ #include "bolt/Passes/AllocCombiner.h" #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" +#include "bolt/Passes/FixRelaxationPass.h" #include "bolt/Passes/FrameOptimizer.h" #include "bolt/Passes/Hugify.h" #include "bolt/Passes/IdenticalCodeFolding.h" @@ -179,6 +180,11 @@ PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), cl::cat(BoltOptCategory)); +static cl::opt + PrintFixRelaxations("print-fix-relaxations", + cl::desc("print functions after fix relaxations pass"), + cl::cat(BoltOptCategory)); + static cl::opt PrintVeneerElimination( "print-veneer-elimination", cl::desc("print functions after veneer elimination pass"), @@ -315,9 +321,12 @@ Manager.registerPass(std::make_unique(), opts::AsmDump.getNumOccurrences()); - if (BC.isAArch64()) + if (BC.isAArch64()) { + Manager.registerPass(std::make_unique(PrintFixRelaxations)); + Manager.registerPass( std::make_unique(PrintVeneerElimination)); + } // Here we manage dependencies/order manually, since passes are run in the // order they're registered. diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1882,7 +1882,7 @@ } // anonymous namespace bool RewriteInstance::analyzeRelocation( - const RelocationRef &Rel, uint64_t RType, std::string &SymbolName, + const RelocationRef &Rel, uint64_t &RType, std::string &SymbolName, bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, uint64_t &ExtractedValue, bool &Skip) const { Skip = false; @@ -2554,7 +2554,8 @@ } if (ForceRelocation) { - std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName; + std::string Name = + Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName; ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); SymbolAddress = 0; if (Relocation::isGOT(RType)) diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -63,6 +63,10 @@ return Inst.getOpcode() == AArch64::ADR; } + bool isAddXri(const MCInst &Inst) const { + return Inst.getOpcode() == AArch64::ADDXri; + } + void getADRReg(const MCInst &Inst, MCPhysReg &RegName) const override { assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction"); assert(MCPlus::getNumPrimeOperands(Inst) != 0 && @@ -365,12 +369,11 @@ // Auto-select correct operand number if (OpNum == 0) { - if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst)) + if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst) || + isMOVW(Inst)) OpNum = 1; - if (isTB(Inst)) + if (isTB(Inst) || isAddXri(Inst)) OpNum = 2; - if (isMOVW(Inst)) - OpNum = 1; } return true; @@ -1072,6 +1075,19 @@ return 3; } + bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const override { + if (!isADRP(Adrp) || !isAddXri(Add)) + return false; + + assert(Adrp.getOperand(0).isReg() && + "Unexpected operand in ADRP instruction"); + MCPhysReg AdrpReg = Adrp.getOperand(0).getReg(); + assert(Add.getOperand(1).isReg() && + "Unexpected operand in ADDXri instruction"); + MCPhysReg AddReg = Add.getOperand(1).getReg(); + return AdrpReg == AddReg; + } + bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol, int64_t Addend, MCContext *Ctx, int64_t &Value, uint64_t RelType) const override { diff --git a/bolt/test/AArch64/Inputs/got-ld64-relaxation.yaml b/bolt/test/AArch64/Inputs/got-ld64-relaxation.yaml new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/Inputs/got-ld64-relaxation.yaml @@ -0,0 +1,202 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_AARCH64 + Entry: 0x210648 +ProgramHeaders: + - Type: PT_PHDR + Flags: [ PF_R ] + VAddr: 0x200040 + Align: 0x8 + - Type: PT_INTERP + Flags: [ PF_R ] + FirstSec: .interp + LastSec: .interp + VAddr: 0x2002A8 + - Type: PT_LOAD + Flags: [ PF_R ] + FirstSec: .interp + LastSec: .rodata + VAddr: 0x200000 + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + FirstSec: .text + LastSec: .plt + VAddr: 0x210648 + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .text + LastSec: .got + VAddr: 0x322580 + Align: 0x10000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .data + LastSec: .bss + VAddr: 0x332720 + Align: 0x10000 + - Type: PT_DYNAMIC + Flags: [ PF_W, PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x322590 + Align: 0x8 +Sections: + - Name: .interp + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x2002A8 + AddressAlign: 0x1 + Content: 2F6C69622F6C642D6C696E75782D616172636836342E736F2E3100 + - Name: .dynsym + Type: SHT_DYNSYM + Flags: [ SHF_ALLOC ] + Address: 0x200300 + Link: .dynstr + AddressAlign: 0x8 + - Name: .dynstr + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] + Address: 0x2003F4 + AddressAlign: 0x1 + - Name: .rodata + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_MERGE, SHF_STRINGS ] + Address: 0x2004E8 + AddressAlign: 0x8 + Content: 010002000000000000000000000000000000000000000000486572650A00 + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x210648 + AddressAlign: 0x8 + Content: 1D0080D21E0080D2E50300AAE10340F9E2230091E60300910000009000001A91030800D063E01191040800D084E01391B6070494B10704942F000014800800D0008843F9400000B4B4070414C0035FD6000900D000C01C91010900D021C01C913F0000EBC000005481FFFF90217842F9610000B4F00301AA00021FD6C0035FD6000900D000C01C91010900D021C01C91210000CB22FC7FD3410C818BFF0781EB21FC4193C000005482FFFF90427C42F9620000B4F00302AA00021FD6C0035FD6FD7BBEA9FD030091F30B00F9130900D060A25D3980000035DEFFFF972000805260A21D39F30B40F9FD7BC2A8C0035FD6E4FFFF17FF4300D1000800D000601191E00700F90004009100008052FF430091C0035FD6FD7BBFA9FD03009160F7FFD00000149142000094FD7BC1A8C0035FD600000000FD7BBCA9FD030091F35301A91F2003D554080810F55B02A91F2003D5B5070810940215CBF603002AF76303A9F70301AAF80302AA14000094FF0F94EB6001005494FE4393130080D2A37A73F8E20318AA73060091E10317AAE003162A60003FD69F0213EB21FFFF54F35341A9F55B42A9F76343A9FD7BC4A8C0035FD61F2003D5C0035FD6 + - Name: .plt + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x312520 + AddressAlign: 0x10 + Content: F07BBFA91001009011A243F910021D9120021FD61F2003D51F2003D51F2003D51001009011A643F910221D9120021FD61001009011AA43F910421D9120021FD61001009011AE43F910621D9120021FD61001009011B243F910821D9120021FD6 + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x322590 + Link: .dynstr + AddressAlign: 0x8 + Entries: + - Tag: DT_NEEDED + Value: 0x65 + - Tag: DT_SYMTAB + Value: 0x200300 + - Tag: DT_SYMENT + Value: 0x18 + - Tag: DT_STRTAB + Value: 0x2003F4 + - Tag: DT_STRSZ + Value: 0x7A + - Tag: DT_NULL + Value: 0x0 + - Name: .got + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x322710 + AddressAlign: 0x8 + Content: '00000000000000005824310000000000' + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x332720 + AddressAlign: 0x8 + Content: '00000000000000000000000000000000' + - Name: .tm_clone_table + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x332730 + AddressAlign: 0x8 + - Name: .got.plt + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x332730 + AddressAlign: 0x8 + Content: '0000000000000000000000000000000000000000000000002025310000000000202531000000000020253100000000002025310000000000' + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x332768 + AddressAlign: 0x1 + Size: 0x1 + - Name: .rela.text + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .text + Relocations: + - Offset: 0x210740 + Symbol: foo + Type: R_AARCH64_ADR_GOT_PAGE + - Offset: 0x210744 + Symbol: foo + Type: R_AARCH64_LD64_GOT_LO12_NC +Symbols: + - Name: '$x' + Section: .text + Value: 0x210648 + - Name: .text + Type: STT_SECTION + Section: .text + Value: 0x210648 + - Name: .data + Type: STT_SECTION + Section: .data + Value: 0x332720 + - Name: .bss + Type: STT_SECTION + Section: .bss + Value: 0x332768 + - Name: .rodata + Type: STT_SECTION + Section: .rodata + Value: 0x2004E8 + - Name: .interp + Type: STT_SECTION + Section: .interp + Value: 0x2002A8 + - Name: _DYNAMIC + Section: .dynamic + Value: 0x322590 + Other: [ STV_HIDDEN ] + - Name: _start + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x210648 + - Name: main + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x21073C + Size: 0x20 + - Name: data_start + Section: .data + Binding: STB_WEAK + Value: 0x332720 + - Name: _IO_stdin_used + Type: STT_OBJECT + Section: .rodata + Binding: STB_GLOBAL + Value: 0x2004E8 + Size: 0x4 + - Name: __libc_start_main + Type: STT_FUNC + Binding: STB_GLOBAL + - Name: foo + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x21075c + Size: 0xF diff --git a/bolt/test/AArch64/got-ld64-relaxation.test b/bolt/test/AArch64/got-ld64-relaxation.test new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/got-ld64-relaxation.test @@ -0,0 +1,9 @@ +// This test checks that ADR+LDR instruction sequence relaxed by the linker +// to the ADR+ADD sequence is properly reconized and handled by bolt + +// RUN: yaml2obj %p/Inputs/got-ld64-relaxation.yaml &> %t.exe +// RUN: llvm-bolt %t.exe -o /dev/null --print-fix-relaxations \ +// RUN: --print-only=main | FileCheck %s + +// CHECK: adrp x0, foo +// CHECK-NEXT: add x0, x0, :lo12:foo