diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1420,6 +1420,11 @@ llvm_unreachable("not implemented"); } + virtual bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const { + llvm_unreachable("not implemented"); + return false; + } + virtual int getShortJmpEncodingSize() const { llvm_unreachable("not implemented"); } diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h --- a/bolt/include/bolt/Core/Relocation.h +++ b/bolt/include/bolt/Core/Relocation.h @@ -58,8 +58,10 @@ /// Skip relocations that we don't want to handle in BOLT static bool skipRelocationType(uint64_t Type); - /// Handle special cases when relocation should not be processed by BOLT - static bool skipRelocationProcess(uint64_t Type, uint64_t Contents); + /// Handle special cases when relocation should not be processed by BOLT or + /// change relocation \p Type to proper one before continuing if \p Contents + /// and \P Type mismatch occured. + static bool skipRelocationProcess(uint64_t &Type, uint64_t Contents); // Adjust value depending on relocation type (make it PC relative or not) static uint64_t adjustValue(uint64_t Type, uint64_t Value, diff --git a/bolt/include/bolt/Passes/FixRelaxationPass.h b/bolt/include/bolt/Passes/FixRelaxationPass.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Passes/FixRelaxationPass.h @@ -0,0 +1,40 @@ +//===- bolt/Passes/ADRRelaxationPass.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the FixRelaxations class, which locates instructions with +// wrong targets and fixes them. Such problems usually occures when linker +// relaxes (changes) instructions, but doesn't fix relocations types properly +// for them. +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_FIXRELAXATIONPASS_H +#define BOLT_PASSES_FIXRELAXATIONPASS_H + +#include "bolt/Passes/BinaryPasses.h" + +namespace llvm { +namespace bolt { + +class FixRelaxations : public BinaryFunctionPass { + void runOnFunction(BinaryFunction &Function); + +public: + explicit FixRelaxations(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + const char *getName() const override { return "fix-relaxations"; } + + /// Pass entry point + void runOnFunctions(BinaryContext &BC) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -247,7 +247,7 @@ /// The \p SymbolName, \p SymbolAddress, \p Addend and \p ExtractedValue /// parameters will be set on success. The \p Skip argument indicates /// that the relocation was analyzed, but it must not be processed. - bool analyzeRelocation(const object::RelocationRef &Rel, uint64_t RType, + bool analyzeRelocation(const object::RelocationRef &Rel, uint64_t &RType, std::string &SymbolName, bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, uint64_t &ExtractedValue, bool &Skip) const; diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -171,11 +171,11 @@ return Type == ELF::R_AARCH64_NONE || Type == ELF::R_AARCH64_LD_PREL_LO19; } -bool skipRelocationProcessX86(uint64_t Type, uint64_t Contents) { +bool skipRelocationProcessX86(uint64_t &Type, uint64_t Contents) { return false; } -bool skipRelocationProcessAArch64(uint64_t Type, uint64_t Contents) { +bool skipRelocationProcessAArch64(uint64_t &Type, uint64_t Contents) { auto IsMov = [](uint64_t Contents) -> bool { // The bits 28-23 are 0b100101 return (Contents & 0x1f800000) == 0x12800000; @@ -191,12 +191,25 @@ return (Contents & 0x9f000000) == 0x10000000; }; + auto IsAddImm = [](uint64_t Contents) -> bool { + // The bits 30-23 are 0b00100010 + return (Contents & 0x7F800000) == 0x11000000; + }; + auto IsNop = [](uint64_t Contents) -> bool { return Contents == 0xd503201f; }; // The linker might eliminate the instruction and replace it with NOP, ignore if (IsNop(Contents)) return true; + // The linker might relax ADRP+LDR instruction sequence for loading symbol + // address from GOT table to ADRP+ADD sequence that would point to the + // binary-local symbol. Change relocation type in order to process it right. + if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) { + Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; + return false; + } + // The linker might perform TLS relocations relaxations, such as // changed TLS access model (e.g. changed global dynamic model // to initial exec), thus changing the instructions. The static @@ -548,7 +561,7 @@ return skipRelocationTypeX86(Type); } -bool Relocation::skipRelocationProcess(uint64_t Type, uint64_t Contents) { +bool Relocation::skipRelocationProcess(uint64_t &Type, uint64_t Contents) { if (Arch == Triple::aarch64) return skipRelocationProcessAArch64(Type, Contents); return skipRelocationProcessX86(Type, Contents); diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -13,6 +13,7 @@ DataflowInfoManager.cpp FrameAnalysis.cpp FrameOptimizer.cpp + FixRelaxationPass.cpp HFSort.cpp HFSortPlus.cpp Hugify.cpp diff --git a/bolt/lib/Passes/FixRelaxationPass.cpp b/bolt/lib/Passes/FixRelaxationPass.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Passes/FixRelaxationPass.cpp @@ -0,0 +1,63 @@ +#include "bolt/Passes/FixRelaxationPass.h" +#include "bolt/Core/ParallelUtilities.h" + +using namespace llvm; + +namespace llvm { +namespace bolt { + +// This function finds ADRP+ADD instruction sequences that originally before +// linker relaxations were ADRP+LDR. We've modified LDR/ADD relocation properly +// during relocation reading, so its targeting right symbol. As for ADRP its +// target is wrong before this pass since we won't be able to recognize and +// properly change R_AARCH64_ADR_GOT_PAGE relocation to +// R_AARCH64_ADR_PREL_PG_HI21 during relocation reading. Now we're searching for +// ADRP+ADD sequences, checking that ADRP points to the GOT-table symbol and the +// target of ADD is another symbol. When found change ADRP symbol reference to +// the ADDs one. +void FixRelaxations::runOnFunction(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + for (BinaryBasicBlock &BB : BF) { + for (auto II = BB.begin(); II != BB.end(); ++II) { + MCInst &Adrp = *II; + if (BC.MIB->isPseudo(Adrp) || !BC.MIB->isADRP(Adrp)) + continue; + + const MCSymbol *AdrpSymbol = BC.MIB->getTargetSymbol(Adrp); + if (!AdrpSymbol || AdrpSymbol->getName() != "__BOLT_got_zero") + continue; + + auto NextII = std::next(II); + if (NextII == BB.end()) + continue; + + const MCInst &Add = *NextII; + if (!BC.MIB->matchAdrpAddPair(Adrp, Add)) + continue; + + const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Add); + if (!Symbol || AdrpSymbol == Symbol) + continue; + + const int64_t Addend = BC.MIB->getTargetAddend(Add); + BC.MIB->setOperandToSymbolRef(Adrp, /*OpNum*/ 1, Symbol, Addend, + BC.Ctx.get(), ELF::R_AARCH64_NONE); + } + } +} + +void FixRelaxations::runOnFunctions(BinaryContext &BC) { + if (!BC.isAArch64() || !BC.HasRelocations) + return; + + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + runOnFunction(BF); + }; + + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, nullptr, + "FixRelaxations"); +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -12,6 +12,7 @@ #include "bolt/Passes/AllocCombiner.h" #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" +#include "bolt/Passes/FixRelaxationPass.h" #include "bolt/Passes/FrameOptimizer.h" #include "bolt/Passes/Hugify.h" #include "bolt/Passes/IdenticalCodeFolding.h" @@ -179,6 +180,11 @@ PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), cl::cat(BoltOptCategory)); +static cl::opt + PrintFixRelaxations("print-fix-relaxations", + cl::desc("print functions after fix relaxations pass"), + cl::cat(BoltOptCategory)); + static cl::opt PrintVeneerElimination( "print-veneer-elimination", cl::desc("print functions after veneer elimination pass"), @@ -315,9 +321,12 @@ Manager.registerPass(std::make_unique(), opts::AsmDump.getNumOccurrences()); - if (BC.isAArch64()) + if (BC.isAArch64()) { + Manager.registerPass(std::make_unique(PrintFixRelaxations)); + Manager.registerPass( std::make_unique(PrintVeneerElimination)); + } // Here we manage dependencies/order manually, since passes are run in the // order they're registered. diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1882,7 +1882,7 @@ } // anonymous namespace bool RewriteInstance::analyzeRelocation( - const RelocationRef &Rel, uint64_t RType, std::string &SymbolName, + const RelocationRef &Rel, uint64_t &RType, std::string &SymbolName, bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, uint64_t &ExtractedValue, bool &Skip) const { Skip = false; @@ -2554,7 +2554,8 @@ } if (ForceRelocation) { - std::string Name = Relocation::isGOT(RType) ? "Zero" : SymbolName; + std::string Name = + Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName; ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); SymbolAddress = 0; if (Relocation::isGOT(RType)) diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -63,6 +63,10 @@ return Inst.getOpcode() == AArch64::ADR; } + bool isAddXri(const MCInst &Inst) const { + return Inst.getOpcode() == AArch64::ADDXri; + } + void getADRReg(const MCInst &Inst, MCPhysReg &RegName) const override { assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction"); assert(MCPlus::getNumPrimeOperands(Inst) != 0 && @@ -365,12 +369,11 @@ // Auto-select correct operand number if (OpNum == 0) { - if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst)) + if (isConditionalBranch(Inst) || isADR(Inst) || isADRP(Inst) || + isMOVW(Inst)) OpNum = 1; - if (isTB(Inst)) + if (isTB(Inst) || isAddXri(Inst)) OpNum = 2; - if (isMOVW(Inst)) - OpNum = 1; } return true; @@ -1072,6 +1075,19 @@ return 3; } + bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const override { + if (!isADRP(Adrp) || !isAddXri(Add)) + return false; + + assert(Adrp.getOperand(0).isReg() && + "Unexpected operand in ADRP instruction"); + MCPhysReg AdrpReg = Adrp.getOperand(0).getReg(); + assert(Add.getOperand(1).isReg() && + "Unexpected operand in ADDXri instruction"); + MCPhysReg AddReg = Add.getOperand(1).getReg(); + return AdrpReg == AddReg; + } + bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol, int64_t Addend, MCContext *Ctx, int64_t &Value, uint64_t RelType) const override { diff --git a/bolt/test/AArch64/got-ld64-relaxation.c b/bolt/test/AArch64/got-ld64-relaxation.c new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/got-ld64-relaxation.c @@ -0,0 +1,26 @@ +// This test checks that ADR+LDR instruction sequence relaxed by the linker +// to the ADR+ADD sequence is properly reconized and handled by bolt + +// RUN: %clang -c %cflags -fPIE %s -o %t.o +// RUN: %clang %cflags -fPIE -Wl,-q -pie %t.o %p/../Inputs/asm_foo.s -o %t.exe +// RUN: llvm-objdump -dr %t.exe | FileCheck --check-prefix=CHECKORIG %s +// RUN: llvm-bolt %t.exe -o /dev/null --print-fix-relaxations \ +// RUN: --print-only=_start | FileCheck %s + +// CHECKORIG: adrp +// CHECKORIG-NEXT: R_AARCH64_ADR_GOT_PAGE foo +// CHECKORIG-NEXT: add +// CHECKORIG-NEXT: R_AARCH64_LD64_GOT_LO12_NC foo + +// CHECK: adrp x[[#%x,REG:]], foo +// CHECK-NEXT: add x[[#REG]], x[[#REG]], :lo12:foo + +void foo(void (*Foo)()); + +void _start() { + void (*Foo)() = foo; + foo(Foo); + asm volatile(".rept 262146\n" + " add x0, x0, #1\n" + ".endr\n"); +}