diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -39,6 +39,7 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -641,6 +642,10 @@ /// special linux kernel sections std::unordered_map> LKMarkers; + /// List of external addresses in the code that are not a function start + /// and are referenced from BinaryFunction. + std::list> InterproceduralReferences; + /// PseudoProbe decoder MCPseudoProbeDecoder ProbeDecoder; @@ -884,8 +889,23 @@ bool registerFragment(BinaryFunction &TargetFunction, BinaryFunction &Function) const; - /// Resolve inter-procedural dependencies from \p Function. - void processInterproceduralReferences(BinaryFunction &Function); + /// Add unterprocedural reference for \p Function to \p Address + void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) { + InterproceduralReferences.push_back({Function, Address}); + } + + /// Used to fix the target of linker-generated AArch64 adrp + add + /// sequence with no relocation info. + void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits, + MCInst &LoadHiBits, uint64_t Target); + + /// Return true if AARch64 veneer was successfully matched at a given + /// \p Address and register veneer binary function if \p MatchOnly + /// argument is false. + bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false); + + /// Resolve inter-procedural dependencies from + void processInterproceduralReferences(); /// Skip functions with all parent and child fragments transitively. void skipMarkedFragments(); diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -253,10 +253,6 @@ std::unique_ptr BLI; - /// Set of external addresses in the code that are not a function start - /// and are referenced from this function. - std::set InterproceduralReferences; - /// All labels in the function that are referenced via relocations from /// data objects. Typically these are jump table destinations and computed /// goto labels. diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1384,7 +1384,7 @@ llvm_unreachable("not implemented"); } - /// Return true if the instruction CurInst, in combination with the recent + /// Return not 0 if the instruction CurInst, in combination with the recent /// history of disassembled instructions supplied by [Begin, End), is a linker /// generated veneer/stub that needs patching. This happens in AArch64 when /// the code is large and the linker needs to generate stubs, but it does @@ -1394,11 +1394,14 @@ /// is put in TgtLowBits, and its pair in TgtHiBits. If the instruction in /// TgtHiBits does not have an immediate operand, but an expression, then /// this expression is put in TgtHiSym and Tgt only contains the lower bits. - virtual bool matchLinkerVeneer(InstructionIterator Begin, - InstructionIterator End, uint64_t Address, - const MCInst &CurInst, MCInst *&TargetHiBits, - MCInst *&TargetLowBits, - uint64_t &Target) const { + /// Return value is a total number of instructions that were used to create + /// a veneer. + virtual uint64_t matchLinkerVeneer(InstructionIterator Begin, + InstructionIterator End, uint64_t Address, + const MCInst &CurInst, + MCInst *&TargetHiBits, + MCInst *&TargetLowBits, + uint64_t &Target) const { llvm_unreachable("not implemented"); } diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -416,7 +416,7 @@ BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); } } else { - BF.InterproceduralReferences.insert(Address); + addInterproceduralReference(&BF, Address); } } @@ -1121,8 +1121,105 @@ return true; } -void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { - for (uint64_t Address : Function.InterproceduralReferences) { +void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, + MCInst &LoadLowBits, + MCInst &LoadHiBits, + uint64_t Target) { + const MCSymbol *TargetSymbol; + uint64_t Addend = 0; + std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, + /*IsPCRel*/ true); + int64_t Val; + MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, + ELF::R_AARCH64_ADR_PREL_PG_HI21); + MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), + Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); +} + +bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { + BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); + if (TargetFunction) + return false; + + ErrorOr Section = getSectionForAddress(Address); + assert(Section && "cannot get section for referenced address"); + if (!Section->isText()) + return false; + + bool Ret = false; + StringRef SectionContents = Section->getContents(); + uint64_t Offset = Address - Section->getAddress(); + const uint64_t MaxSize = SectionContents.size() - Offset; + const uint8_t *Bytes = + reinterpret_cast(SectionContents.data()); + ArrayRef Data(Bytes + Offset, MaxSize); + + auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, + MCInst &Instruction, uint64_t Offset, + uint64_t AbsoluteInstrAddr, + uint64_t TotalSize) -> bool { + MCInst *TargetHiBits, *TargetLowBits; + uint64_t TargetAddress, Count; + Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), + AbsoluteInstrAddr, Instruction, TargetHiBits, + TargetLowBits, TargetAddress); + if (!Count) + return false; + + if (MatchOnly) + return true; + + // NOTE The target symbol was created during disassemble's + // handleExternalReference + const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); + BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), + *Section, Address, TotalSize); + addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, + TargetAddress); + MIB->addAnnotation(Instruction, "AArch64Veneer", true); + Veneer->addInstruction(Offset, std::move(Instruction)); + --Count; + for (auto It = std::prev(Instructions.end()); Count != 0; + It = std::prev(It), --Count) { + MIB->addAnnotation(It->second, "AArch64Veneer", true); + Veneer->addInstruction(It->first, std::move(It->second)); + } + + Veneer->getOrCreateLocalLabel(Address); + Veneer->setMaxSize(TotalSize); + Veneer->updateState(BinaryFunction::State::Disassembled); + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address + << "\n"); + return true; + }; + + uint64_t Size = 0, TotalSize = 0; + BinaryFunction::InstrMapType VeneerInstructions; + for (Offset = 0; Offset < MaxSize; Offset += Size) { + MCInst Instruction; + const uint64_t AbsoluteInstrAddr = Address + Offset; + if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), + AbsoluteInstrAddr, nulls())) + break; + + TotalSize += Size; + if (MIB->isBranch(Instruction)) { + Ret = matchVeneer(VeneerInstructions, Instruction, Offset, + AbsoluteInstrAddr, TotalSize); + break; + } + + VeneerInstructions.emplace(Offset, std::move(Instruction)); + } + + return Ret; +} + +void BinaryContext::processInterproceduralReferences() { + for (const std::pair &It : + InterproceduralReferences) { + BinaryFunction &Function = *It.first; + uint64_t Address = It.second; if (!Address) continue; @@ -1132,7 +1229,7 @@ continue; if (TargetFunction) { - if (TargetFunction->IsFragment && + if (TargetFunction->isFragment() && !registerFragment(*TargetFunction, Function)) { errs() << "BOLT-WARNING: interprocedural reference between unrelated " "fragments: " @@ -1158,6 +1255,10 @@ if (SectionName == ".plt" || SectionName == ".plt.got") continue; + // Check if it is aarch64 veneer written at Address + if (isAArch64() && handleAArch64Veneer(Address)) + continue; + if (opts::processAllFunctions()) { errs() << "BOLT-ERROR: cannot process binaries with unmarked " << "object in code at address 0x" << Twine::utohexstr(Address) @@ -1178,7 +1279,7 @@ } } - clearList(Function.InterproceduralReferences); + InterproceduralReferences.clear(); } void BinaryContext::postProcessSymbolTable() { diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1061,27 +1061,12 @@ Instruction, Expr, *BC.Ctx, 0))); }; - // Used to fix the target of linker-generated AArch64 stubs with no relocation - // info - auto fixStubTarget = [&](MCInst &LoadLowBits, MCInst &LoadHiBits, - uint64_t Target) { - const MCSymbol *TargetSymbol; - uint64_t Addend = 0; - std::tie(TargetSymbol, Addend) = BC.handleAddressRef(Target, *this, true); - - int64_t Val; - MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), - Val, ELF::R_AARCH64_ADR_PREL_PG_HI21); - MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), - Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); - }; - auto handleExternalReference = [&](MCInst &Instruction, uint64_t Size, uint64_t Offset, uint64_t TargetAddress, bool &IsCall) -> MCSymbol * { const uint64_t AbsoluteInstrAddr = getAddress() + Offset; MCSymbol *TargetSymbol = nullptr; - InterproceduralReferences.insert(TargetAddress); + BC.addInterproceduralReference(this, TargetAddress); if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) { errs() << "BOLT-WARNING: relaxed tail call detected at 0x" << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this @@ -1147,7 +1132,7 @@ HasFixedIndirectBranch = true; } else { MIB->convertJmpToTailCall(Instruction); - InterproceduralReferences.insert(IndirectTarget); + BC.addInterproceduralReference(this, IndirectTarget); } break; } @@ -1164,19 +1149,20 @@ auto handleAArch64IndirectCall = [&](MCInst &Instruction, uint64_t Offset) { const uint64_t AbsoluteInstrAddr = getAddress() + Offset; MCInst *TargetHiBits, *TargetLowBits; - uint64_t TargetAddress; - if (MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), - AbsoluteInstrAddr, Instruction, TargetHiBits, - TargetLowBits, TargetAddress)) { + uint64_t TargetAddress, Count; + Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), + AbsoluteInstrAddr, Instruction, TargetHiBits, + TargetLowBits, TargetAddress); + if (Count) { MIB->addAnnotation(Instruction, "AArch64Veneer", true); - - uint8_t Counter = 0; - for (auto It = std::prev(Instructions.end()); Counter != 2; - --It, ++Counter) { + --Count; + for (auto It = std::prev(Instructions.end()); Count != 0; + It = std::prev(It), --Count) { MIB->addAnnotation(It->second, "AArch64Veneer", true); } - fixStubTarget(*TargetLowBits, *TargetHiBits, TargetAddress); + BC.addAdrpAddRelocAArch64(*this, *TargetLowBits, *TargetHiBits, + TargetAddress); } }; @@ -1295,7 +1281,9 @@ TargetSymbol = getOrCreateLocalLabel(TargetAddress); } else { if (TargetAddress == getAddress() + getSize() && - TargetAddress < getAddress() + getMaxSize()) { + TargetAddress < getAddress() + getMaxSize() && + !(BC.isAArch64() && + BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) { // Result of __builtin_unreachable(). LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x" << Twine::utohexstr(AbsoluteInstrAddr) @@ -2842,7 +2830,6 @@ clearList(Instructions); clearList(IgnoredBranches); clearList(TakenBranches); - clearList(InterproceduralReferences); if (BC.HasRelocations) { for (std::pair &LI : Labels) @@ -4416,17 +4403,20 @@ } bool BinaryFunction::isAArch64Veneer() const { - if (BasicBlocks.size() != 1) + if (empty()) return false; BinaryBasicBlock &BB = **BasicBlocks.begin(); - if (BB.size() != 3) - return false; - for (MCInst &Inst : BB) if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer")) return false; + for (auto I = BasicBlocks.begin() + 1, E = BasicBlocks.end(); I != E; ++I) { + for (MCInst &Inst : **I) + if (!BC.MIB->isNoop(Inst)) + return false; + } + return true; } diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1595,6 +1595,9 @@ } void StripRepRet::runOnFunctions(BinaryContext &BC) { + if (!BC.isX86()) + return; + uint64_t NumPrefixesRemoved = 0; uint64_t NumBytesSaved = 0; for (auto &BFI : BC.getBinaryFunctions()) { diff --git a/bolt/lib/Passes/VeneerElimination.cpp b/bolt/lib/Passes/VeneerElimination.cpp --- a/bolt/lib/Passes/VeneerElimination.cpp +++ b/bolt/lib/Passes/VeneerElimination.cpp @@ -60,8 +60,8 @@ } } - LLVM_DEBUG(dbgs() << "BOLT-INFO: number of removed linker-inserted veneers :" - << VeneersCount << "\n"); + outs() << "BOLT-INFO: number of removed linker-inserted veneers :" + << VeneersCount << "\n"; // Handle veneers to veneers in case they occur for (auto entry : VeneerDestinations) { diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -312,6 +312,10 @@ Manager.registerPass(std::make_unique(), opts::AsmDump.getNumOccurrences()); + if (BC.isAArch64()) + Manager.registerPass( + std::make_unique(PrintVeneerElimination)); + if (opts::Instrument) Manager.registerPass(std::make_unique(NeverPrint)); @@ -338,10 +342,6 @@ Manager.registerPass(std::make_unique(PrintICF), opts::ICF); - if (BC.isAArch64()) - Manager.registerPass( - std::make_unique(PrintVeneerElimination)); - Manager.registerPass( std::make_unique(NeverPrint, opts::SpecializeMemcpy1), !opts::SpecializeMemcpy1.empty()); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2891,10 +2891,9 @@ if (opts::PrintAll || opts::PrintDisasm) Function.print(outs(), "after disassembly", true); - - BC->processInterproceduralReferences(Function); } + BC->processInterproceduralReferences(); BC->clearJumpTableOffsets(); BC->populateJumpTables(); BC->skipMarkedFragments(); diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -1032,17 +1032,17 @@ /// ADD x16, x16, imm /// BR x16 /// - bool matchLinkerVeneer(InstructionIterator Begin, InstructionIterator End, - uint64_t Address, const MCInst &CurInst, - MCInst *&TargetHiBits, MCInst *&TargetLowBits, - uint64_t &Target) const override { + uint64_t matchLinkerVeneer(InstructionIterator Begin, InstructionIterator End, + uint64_t Address, const MCInst &CurInst, + MCInst *&TargetHiBits, MCInst *&TargetLowBits, + uint64_t &Target) const override { if (CurInst.getOpcode() != AArch64::BR || !CurInst.getOperand(0).isReg() || CurInst.getOperand(0).getReg() != AArch64::X16) - return false; + return 0; auto I = End; if (I == Begin) - return false; + return 0; --I; Address -= 4; @@ -1051,7 +1051,7 @@ !I->getOperand(1).isReg() || I->getOperand(0).getReg() != AArch64::X16 || I->getOperand(1).getReg() != AArch64::X16 || !I->getOperand(2).isImm()) - return false; + return 0; TargetLowBits = &*I; uint64_t Addr = I->getOperand(2).getImm() & 0xFFF; @@ -1060,12 +1060,12 @@ if (I->getOpcode() != AArch64::ADRP || MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() || !I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16) - return false; + return 0; TargetHiBits = &*I; Addr |= (Address + ((int64_t)I->getOperand(1).getImm() << 12)) & 0xFFFFFFFFFFFFF000ULL; Target = Addr; - return true; + return 3; } bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol, @@ -1107,8 +1107,6 @@ bool isPrefix(const MCInst &Inst) const override { return false; } - bool deleteREPPrefix(MCInst &Inst) const override { return false; } - bool createReturn(MCInst &Inst) const override { Inst.setOpcode(AArch64::RET); Inst.clear(); diff --git a/bolt/test/AArch64/veneer-gold.s b/bolt/test/AArch64/veneer-gold.s new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/veneer-gold.s @@ -0,0 +1,75 @@ +// This test checks that the gold linker style veneer are properly handled +// by BOLT. +// Strip .rela.mytext section to simulate inserted by a linker veneers +// that does not contain relocations. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: %clang %cflags -fPIC -pie %t.o -o %t.exe \ +# RUN: -nostartfiles -nodefaultlibs -Wl,-z,notext \ +# RUN: -fuse-ld=lld -Wl,--no-relax -Wl,-q +# RUN: llvm-objcopy --remove-section .rela.mytext %t.exe +# RUN: llvm-objdump -d -j .mytext %t.exe | \ +# RUN: FileCheck --check-prefix=CHECKVENEER %s +# RUN: llvm-bolt %t.exe -o %t.bolt --elim-link-veneers=true \ +# RUN: --lite=0 --use-old-text=0 +# RUN: llvm-objdump -d -j .text %t.bolt | FileCheck %s + +.text +.balign 4 +.global dummy +.type dummy, %function +dummy: + adrp x1, dummy + ret +.size dummy, .-dummy + +.section ".mytext", "ax" +.balign 4 +.global foo +.type foo, %function +foo: +# CHECK: : +# CHECK-NEXT : {{.*}} bl {{.*}} + bl .L2 + ret +.size foo, .-foo + +.global foo2 +.type foo2, %function +foo2: +# CHECK: : +# CHECK-NEXT : {{.*}} bl {{.*}} + bl .L2 + ret +.size foo2, .-foo2 + +.global _start +.type _start, %function +_start: +# CHECK: <_start>: +# CHECK-NEXT: {{.*}} bl {{.*}} + bl .L1 + adr x0, .L0 + ret +.L0: + .xword 0 +.size _start, .-_start +.L1: +# CHECKVENEER: adrp +# CHECKVENEER-NEXT: add +# CHECKVENEER-NEXT: br +# CHECKVENEER-NEXT: nop + adrp x16, foo + add x16, x16, #:lo12:foo + br x16 + nop +.L2: +# CHECKVENEER-NEXT: adrp +# CHECKVENEER-NEXT: add +# CHECKVENEER-NEXT: br +# CHECKVENEER-NEXT: nop + adrp x16, foo2 + add x16, x16, #:lo12:foo2 + br x16 + nop diff --git a/bolt/test/AArch64/veneer.s b/bolt/test/AArch64/veneer.s new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/veneer.s @@ -0,0 +1,38 @@ +// This test checks that the veneer are properly handled by BOLT. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: %clang %cflags -fPIC -pie %t.o -o %t.exe \ +# RUN: -nostartfiles -nodefaultlibs -Wl,-z,notext \ +# RUN: -fuse-ld=lld -Wl,--no-relax +# RUN: llvm-objdump -d --disassemble-symbols='myveneer' %t.exe | \ +# RUN: FileCheck --check-prefix=CHECKVENEER %s +# RUN: llvm-bolt %t.exe -o %t.bolt --elim-link-veneers=true --lite=0 +# RUN: llvm-objdump -d --disassemble-symbols='_start' %t.bolt | FileCheck %s + +.text +.balign 4 +.global foo +.type foo, %function +foo: + ret +.size foo, .-foo + +.global myveneer +.type myveneer, %function +myveneer: +# CHECKVENEER: adrp +# CHECKVENEER-NEXT: add + adrp x16, foo + add x16, x16, #:lo12:foo + br x16 + nop +.size myveneer, .-myveneer + +.global _start +.type _start, %function +_start: +# CHECK: {{.*}} bl {{.*}} + bl myveneer + ret +.size _start, .-_start