Index: ELF/AArch64ErrataFix.h =================================================================== --- ELF/AArch64ErrataFix.h +++ ELF/AArch64ErrataFix.h @@ -12,11 +12,39 @@ #include "lld/Common/LLVM.h" +#include +#include + namespace lld { namespace elf { +class Defined; +class InputSection; +class InputSectionDescription; class OutputSection; -void reportA53Errata843419Fixes(); +class Patch843419Section; + +class AArch64Err843419Patcher { +public: + // return true if Patches have been added to the OutputSections. + bool createFixes(); + +private: + std::vector + patchInputSectionDescription(InputSectionDescription &ISD); + + void insertPatches(InputSectionDescription &ISD, + std::vector &Patches); + + void init(); + + // A cache of the mapping symbols defined by the InputSecion sorted in order + // of ascending value with redundant symbols removed. These describe + // the ranges of code and data in an executable InputSection. + std::map> SectionMap; + + bool Initialized = false; +}; } // namespace elf } // namespace lld Index: ELF/AArch64ErrataFix.cpp =================================================================== --- ELF/AArch64ErrataFix.cpp +++ ELF/AArch64ErrataFix.cpp @@ -23,9 +23,6 @@ // - We can place the replacement sequence within range of the branch. // FIXME: -// - At this stage the implementation only supports detection and not fixing, -// this is sufficient to test the decode and recognition of the erratum -// sequence. // - The implementation here only supports one patch, the AArch64 Cortex-53 // errata 843419 that affects r0p0, r0p1, r0p2 and r0p4 versions of the core. // To keep the initial version simple there is no support for multiple @@ -336,13 +333,6 @@ isLoadStoreRegisterUnsigned(Instr4) && getRn(Instr4) == Rn; } -static void report843419Fix(uint64_t AdrpAddr) { - if (!Config->Verbose) - return; - message("detected cortex-a53-843419 erratum sequence starting at " + - utohexstr(AdrpAddr) + " in unpatched output."); -} - // Scan the instruction sequence starting at Offset Off from the base of // InputSection IS. We update Off in this function rather than in the caller as // we can skip ahead much further into the section when we know how many @@ -385,16 +375,66 @@ return PatchOff; } -// The AArch64 ABI permits data in executable sections. We must avoid scanning -// this data as if it were instructions to avoid false matches. -// The ABI Section 4.5.4 Mapping symbols; defines local symbols that describe -// half open intervals [Symbol Value, Next Symbol Value) of code and data -// within sections. If there is no next symbol then the half open interval is -// [Symbol Value, End of section). The type, code or data, is determined by the -// mapping symbol name, $x for code, $d for data. -std::map> static makeAArch64SectionMap() { - std::map> SectionMap; +class lld::elf::Patch843419Section : public SyntheticSection { +public: + Patch843419Section(InputSection *P, uint64_t Off); + + void writeTo(uint8_t *Buf) override; + + size_t getSize() const override { return 8; } + + uint64_t getLDSTAddr() const; + + // The Section we are patching. + const InputSection *Patchee; + // The offset of the instruction in the Patchee section we are patching. + uint64_t PatcheeOffset; + // A label for the start of the Patch that we can use as a relocation target. + Symbol *PatchSym; +}; + +lld::elf::Patch843419Section::Patch843419Section(InputSection *P, uint64_t Off) + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, + ".text.patch"), + Patchee(P), PatcheeOffset(Off) { + this->Parent = P->getParent(); + PatchSym = addSyntheticLocal( + Saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())), STT_FUNC, 0, + getSize(), this); + addSyntheticLocal(Saver.save("$x"), STT_NOTYPE, 0, 0, this); +} + +uint64_t lld::elf::Patch843419Section::getLDSTAddr() const { + return Patchee->getParent()->Addr + Patchee->OutSecOff + PatcheeOffset; +} + +void lld::elf::Patch843419Section::writeTo(uint8_t *Buf) { + // Copy the instruction that we will be replacing with a branch in the + // Patchee Section. + write32le(Buf, read32le(Patchee->Data.begin() + PatcheeOffset)); + + // Apply any relocation transferred from the original PatcheeSection. + // For a SyntheticSection Buf already has OutSecOff added, but relocateAlloc + // also adds OutSecOff so we need to subtract to avoid double counting. + this->relocateAlloc(Buf - OutSecOff, Buf - OutSecOff + getSize()); + + // Return address is the next instruction after the one we have just copied. + uint64_t S = getLDSTAddr() + 4; + uint64_t P = PatchSym->getVA() + 4; + Target->relocateOne(Buf + 4, R_AARCH64_JUMP26, S - P); +} + +void AArch64Err843419Patcher::init() { + // The AArch64 ABI permits data in executable sections. We must avoid scanning + // this data as if it were instructions to avoid false matches. We use the + // mapping symbols in the InputObjects to identify this data, caching the + // results in SectionMap so we don't have to recalculate it each pass. + + // The ABI Section 4.5.4 Mapping symbols; defines local symbols that describe + // half open intervals [Symbol Value, Next Symbol Value) of code and data + // within sections. If there is no next symbol then the half open interval is + // [Symbol Value, End of section). The type, code or data, is determined by + // the mapping symbol name, $x for code, $d for data. auto IsCodeMapSymbol = [](const Symbol *B) { return B->getName() == "$x" || B->getName().startswith("$x."); }; @@ -435,56 +475,174 @@ }), MapSyms.end()); } - return SectionMap; + Initialized = true; } -static void scanInputSectionDescription(std::vector &MapSyms, - InputSection *IS) { - // Use SectionMap to make sure we only scan code and not inline data. - // We have already sorted MapSyms in ascending order and removed - // consecutive mapping symbols of the same type. Our range of - // executable instructions to scan is therefore [CodeSym->Value, - // DataSym->Value) or [CodeSym->Value, section size). - auto CodeSym = llvm::find_if(MapSyms, [&](const Defined *MS) { - return MS->getName().startswith("$x"); - }); - - while (CodeSym != MapSyms.end()) { - auto DataSym = std::next(CodeSym); - uint64_t Off = (*CodeSym)->Value; - uint64_t Limit = - (DataSym == MapSyms.end()) ? IS->Data.size() : (*DataSym)->Value; - - while (Off < Limit) { - uint64_t StartAddr = IS->getParent()->Addr + IS->OutSecOff + Off; - if (scanCortexA53Errata843419(IS, Off, Limit)) - report843419Fix(StartAddr); +// Insert the PatchSections we have created back into the +// InputSectionDescription. As inserting patches alters the addresses of +// InputSections that follow them, we try and place the patches after all the +// executable sections, although we may need to insert them earlier if the +// InputSectionDescription is larger than the maximum branch range. +void AArch64Err843419Patcher::insertPatches( + InputSectionDescription &ISD, std::vector &Patches) { + uint64_t ISLimit; + uint64_t PrevISLimit = ISD.Sections.front()->OutSecOff; + uint64_t PatchUpperBound = PrevISLimit + Target->ThunkSectionSpacing; + + // Set the OutSecOff of patches to the place where we want to insert them. + // We use a similar strategy to Thunk placement. Place patches roughly + // every multiple of maximum branch range. + auto PatchIt = Patches.begin(); + auto PatchEnd = Patches.end(); + for (const InputSection *IS : ISD.Sections) { + ISLimit = IS->OutSecOff + IS->getSize(); + if (ISLimit > PatchUpperBound) { + while (PatchIt != PatchEnd) { + if ((*PatchIt)->getLDSTAddr() >= PrevISLimit) + break; + (*PatchIt)->OutSecOff = PrevISLimit; + ++PatchIt; + } + PatchUpperBound = PrevISLimit + Target->ThunkSectionSpacing; + } + PrevISLimit = ISLimit; + } + for (; PatchIt != PatchEnd; ++PatchIt) { + (*PatchIt)->OutSecOff = ISLimit; + } + + // merge all patch sections. We use the OutSecOff assigned above to + // determine the insertion point. This is ok as we only merge into an + // InputSectionDescription once per pass, and at the end of the pass + // assignAddresses() will recalculate all the OutSecOff values. + std::vector Tmp; + Tmp.reserve(ISD.Sections.size() + Patches.size()); + auto MergeCmp = [](const InputSection *A, const InputSection *B) { + if (A->OutSecOff < B->OutSecOff) + return true; + if (A->OutSecOff == B->OutSecOff && isa(A) && + !isa(B)) + return true; + return false; + }; + std::merge(ISD.Sections.begin(), ISD.Sections.end(), Patches.begin(), + Patches.end(), std::back_inserter(Tmp), MergeCmp); + ISD.Sections = std::move(Tmp); +} + +// Given an erratum sequence that starts at address AdrpAddr, with an +// instruction that we need to patch at PatcheeOffset from the start of +// InputSection IS, create a Patch843419 Section and add it to the +// Patches that we need to insert. +static void implementPatch(uint64_t AdrpAddr, uint64_t PatcheeOffset, + InputSection *IS, + std::vector &Patches) { + // There may be a relocation at the same offset that we are patching. There + // are three cases that we need to consider. + // Case 1: R_AARCH64_JUMP26 branch relocation. We have already patched this + // instance of the erratum on a previous patch and altered the relocation. We + // have nothing more to do. + // Case 2: A load/store register (unsigned immediate) class relocation. There + // are two of these R_AARCH_LD64_ABS_LO12_NC and R_AARCH_LD64_GOT_LO12_NC and + // they are both absolute. We need to add the same relocation to the patch, + // and replace the relocation with a R_AARCH_JUMP26 branch relocation. + // Case 3: No relocation. We must create a new R_AARCH64_JUMP26 branch + // relocation at the offset. + auto RelIt = std::find_if( + IS->Relocations.begin(), IS->Relocations.end(), + [=](const Relocation &R) { return R.Offset == PatcheeOffset; }); + if (RelIt != IS->Relocations.end() && RelIt->Type == R_AARCH64_JUMP26) + return; + + if (Config->Verbose) + message("detected cortex-a53-843419 erratum sequence starting at " + + utohexstr(AdrpAddr) + " in unpatched output."); + + auto *PS = make(IS, PatcheeOffset); + Patches.push_back(PS); + + auto MakeRelToPatch = [](uint64_t Offset, Symbol *PatchSym) { + return Relocation{R_PC, R_AARCH64_JUMP26, Offset, 0, PatchSym}; + }; + + if (RelIt != IS->Relocations.end()) { + PS->Relocations.push_back( + {RelIt->Expr, RelIt->Type, 0, RelIt->Addend, RelIt->Sym}); + *RelIt = MakeRelToPatch(PatcheeOffset, PS->PatchSym); + } else + IS->Relocations.push_back(MakeRelToPatch(PatcheeOffset, PS->PatchSym)); +} + +// Scan all the instructions in InputSectionDescription, for each instance of +// the erratum sequence create a Patch843419Section. We return the list of +// Patch843419Sections that need to be applied to ISD. +std::vector +AArch64Err843419Patcher::patchInputSectionDescription( + InputSectionDescription &ISD) { + std::vector Patches; + for (InputSection *IS : ISD.Sections) { + // LLD doesn't use the erratum sequence in SyntheticSections. + if (isa(IS)) + continue; + // Use SectionMap to make sure we only scan code and not inline data. + // We have already sorted MapSyms in ascending order and removed consecutive + // mapping symbols of the same type. Our range of executable instructions to + // scan is therefore [CodeSym->Value, DataSym->Value) or [CodeSym->Value, + // section size). + std::vector &MapSyms = SectionMap[IS]; + + auto CodeSym = llvm::find_if(MapSyms, [&](const Defined *MS) { + return MS->getName().startswith("$x"); + }); + + while (CodeSym != MapSyms.end()) { + auto DataSym = std::next(CodeSym); + uint64_t Off = (*CodeSym)->Value; + uint64_t Limit = + (DataSym == MapSyms.end()) ? IS->Data.size() : (*DataSym)->Value; + + while (Off < Limit) { + uint64_t StartAddr = IS->getParent()->Addr + IS->OutSecOff + Off; + if (uint64_t PatcheeOffset = scanCortexA53Errata843419(IS, Off, Limit)) + implementPatch(StartAddr, PatcheeOffset, IS, Patches); + } + if (DataSym == MapSyms.end()) + break; + CodeSym = std::next(DataSym); } - if (DataSym == MapSyms.end()) - break; - CodeSym = std::next(DataSym); } + return Patches; } -// Scan all the executable code in an AArch64 link to detect the Cortex-A53 -// erratum 843419. -// FIXME: The current implementation only scans for the erratum sequence, it -// does not attempt to fix it. -void lld::elf::reportA53Errata843419Fixes() { - std::map> SectionMap = - makeAArch64SectionMap(); +// For each InputSectionDescription make one pass over the executable sections +// looking for the erratum sequence; creating a synthetic Patch843419Section +// for each instance found. We insert these synthetic patch sections after the +// executable code in each InputSectionDescription. +// +// PreConditions: +// The Output and Input Sections have had their final addresses assigned. +// +// PostConditions: +// Returns true if at least one patch was added. The addresses of the +// Ouptut and Input Sections may have been changed. +// Returns false if no patches were required and no changes were made. +bool AArch64Err843419Patcher::createFixes() { + if (Initialized == false) + init(); + bool AddressesChanged = false; for (OutputSection *OS : OutputSections) { if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR)) continue; for (BaseCommand *BC : OS->SectionCommands) if (auto *ISD = dyn_cast(BC)) { - for (InputSection *IS : ISD->Sections) { - // LLD doesn't use the erratum sequence in SyntheticSections. - if (isa(IS)) - continue; - scanInputSectionDescription(SectionMap[IS], IS); + std::vector Patches = + patchInputSectionDescription(*ISD); + if (!Patches.empty()) { + insertPatches(*ISD, Patches); + AddressesChanged = true; } } } + return AddressesChanged; } Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -1399,6 +1399,7 @@ // alter InputSection addresses we must converge to a fixed point. if (Target->NeedsThunks || Config->AndroidPackDynRelocs) { ThunkCreator TC; + AArch64Err843419Patcher A64P; bool Changed; do { Script->assignAddresses(); @@ -1408,7 +1409,7 @@ if (Config->FixCortexA53Errata843419) { if (Changed) Script->assignAddresses(); - reportA53Errata843419Fixes(); + Changed |= A64P.createFixes(); } if (InX::MipsGot) InX::MipsGot->updateAllocSize(); Index: test/ELF/aarch64-cortex-a53-843419-address.s =================================================================== --- test/ELF/aarch64-cortex-a53-843419-address.s +++ test/ELF/aarch64-cortex-a53-843419-address.s @@ -4,7 +4,8 @@ // RUN: .text : { *(.text) *(.text.*) *(.newisd) } \ // RUN: .text2 : { *.(newos) } \ // RUN: .data : { *(.data) } }" > %t.script -// RUN: ld.lld --script %t.script -fix-cortex-a53-843419 -verbose %t.o -o %t2 | FileCheck %s +// RUN: ld.lld --script %t.script -fix-cortex-a53-843419 -verbose %t.o -o %t2 | FileCheck -check-prefix=CHECK-PRINT %s +// RUN: llvm-objdump -triple=aarch64-linux-gnu -d %t2 | FileCheck %s // Test cases for Cortex-A53 Erratum 843419 that involve interactions // between the generated patches and the address of sections. @@ -34,8 +35,12 @@ // symbols with the same type). // - We can ignore erratum sequences in multiple literal data ranges. -// CHECK: detected cortex-a53-843419 erratum sequence starting at FF8 in unpatched output. - +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at FF8 in unpatched output. +// CHECK: t3_ff8_ldr: +// CHECK-NEXT: ff8: 20 00 00 d0 adrp x0, #24576 +// CHECK-NEXT: ffc: 21 00 40 f9 ldr x1, [x1] +// CHECK-NEXT: 1000: f9 0f 00 14 b #16356 +// CHECK-NEXT: 1004: c0 03 5f d6 ret .section .text.01, "ax", %progbits .balign 4096 .space 4096 - 8 @@ -52,7 +57,12 @@ // every symbol so we need to handle the case of $x $x. .local $x.999 $x.999: -// CHECK-NEXT: detected cortex-a53-843419 erratum sequence starting at 1FFC in unpatched output. +// CHECK-PRINT-NEXT: detected cortex-a53-843419 erratum sequence starting at 1FFC in unpatched output. +// CHECK: t3_ffc_ldrsimd: +// CHECK-NEXT: 1ffc: 20 00 00 b0 adrp x0, #20480 +// CHECK-NEXT: 2000: 21 00 40 bd ldr s1, [x1] +// CHECK-NEXT: 2004: fa 0b 00 14 b #12264 +// CHECK-NEXT: 2008: c0 03 5f d6 ret .globl t3_ffc_ldrsimd .type t3_ffc_ldrsimd, %function .space 4096 - 12 @@ -84,8 +94,12 @@ .byte 0xf9 // Check that we can recognise the erratum sequence post literal data. -// CHECK-NEXT: detected cortex-a53-843419 erratum sequence starting at 3FF8 in unpatched output. - +// CHECK-PRINT-NEXT: detected cortex-a53-843419 erratum sequence starting at 3FF8 in unpatched output. +// CHECK: t3_ffc_ldr: +// CHECK-NEXT: 3ff8: 00 00 00 f0 adrp x0, #12288 +// CHECK-NEXT: 3ffc: 21 00 40 f9 ldr x1, [x1] +// CHECK-NEXT: 4000: fd 03 00 14 b #4084 +// CHECK-NEXT: 4004: c0 03 5f d6 ret .space 4096 - 12 .globl t3_ffc_ldr .type t3_ffc_ldr, %function @@ -95,14 +109,29 @@ ldr x0, [x0, :got_lo12:dat] ret +// CHECK: __CortexA53843419_1000: +// CHECK-NEXT: 4fe4: 00 0c 40 f9 ldr x0, [x0, #24] +// CHECK-NEXT: 4fe8: 07 f0 ff 17 b #-16356 +// CHECK: __CortexA53843419_2004: +// CHECK-NEXT: 4fec: 02 0c 40 f9 ldr x2, [x0, #24] +// CHECK-NEXT: 4ff0: 06 f4 ff 17 b #-12264 +// CHECK: __CortexA53843419_4000: +// CHECK-NEXT: 4ff4: 00 0c 40 f9 ldr x0, [x0, #24] +// CHECK-NEXT: 4ff8: 03 fc ff 17 b #-4084 + .section .text.02, "ax", %progbits - .space 4096 - 12 + .space 4096 - 36 // Start a new InputSectionDescription (see Linker Script) so the // start address will be affected by any patches added to previous // InputSectionDescription. -// CHECK: detected cortex-a53-843419 erratum sequence starting at 4FFC in unpatched output. +// CHECK-PRINT-NEXT: detected cortex-a53-843419 erratum sequence starting at 4FFC in unpatched output +// CHECK: t3_ffc_str: +// CHECK-NEXT: 4ffc: 00 00 00 d0 adrp x0, #8192 +// CHECK-NEXT: 5000: 21 00 00 f9 str x1, [x1] +// CHECK-NEXT: 5004: fb 03 00 14 b #4076 +// CHECK-NEXT: 5008: c0 03 5f d6 ret .section .newisd, "ax", %progbits .globl t3_ffc_str @@ -112,13 +141,23 @@ str x1, [x1, #0] ldr x0, [x0, :got_lo12:dat] ret - .space 4096 - 20 + .space 4096 - 28 -// CHECK: detected cortex-a53-843419 erratum sequence starting at 5FF8 in unpatched output. +// CHECK: __CortexA53843419_5004: +// CHECK-NEXT: 5ff0: 00 0c 40 f9 ldr x0, [x0, #24] +// CHECK-NEXT: 5ff4: 05 fc ff 17 b #-4076 // Start a new OutputSection (see Linker Script) so the // start address will be affected by any patches added to previous // InputSectionDescription. + +//CHECK-PRINT-NEXT: detected cortex-a53-843419 erratum sequence starting at 5FF8 in unpatched output +// CHECK: t3_ff8_str: +// CHECK-NEXT: 5ff8: 00 00 00 b0 adrp x0, #4096 +// CHECK-NEXT: 5ffc: 21 00 00 f9 str x1, [x1] +// CHECK-NEXT: 6000: 03 00 00 14 b #12 +// CHECK-NEXT: 6004: c0 03 5f d6 ret + .section .newos, "ax", %progbits .globl t3_ff8_str .type t3_ff8_str, %function @@ -132,6 +171,10 @@ _start: ret +// CHECK: __CortexA53843419_6000: +// CHECK-NEXT: 600c: 00 0c 40 f9 ldr x0, [x0, #24] +// CHECK-NEXT: 6010: fd ff ff 17 b #-12 + .data .globl dat dat: .word 0 Index: test/ELF/aarch64-cortex-a53-843419-recognize.s =================================================================== --- test/ELF/aarch64-cortex-a53-843419-recognize.s +++ test/ELF/aarch64-cortex-a53-843419-recognize.s @@ -1,30 +1,38 @@ // REQUIRES: aarch64 // RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o // RUN: ld.lld -fix-cortex-a53-843419 -verbose %t.o -o %t2 | FileCheck -check-prefix CHECK-PRINT %s - +// RUN: llvm-objdump -triple=aarch64-linux-gnu -d %t2 | FileCheck %s -check-prefixes=CHECK,CHECK-FIX +// RUN: ld.lld -verbose %t.o -o %t3 +// RUN: llvm-objdump -triple=aarch64-linux-gnu -d %t3 | FileCheck %s -check-prefixes=CHECK,CHECK-NOFIX // Test cases for Cortex-A53 Erratum 843419 // See ARM-EPM-048406 Cortex_A53_MPCore_Software_Developers_Errata_Notice.pdf // for full erratum details. // In Summary // 1.) -// ADRP (0xff8 or 0xffc) +// ADRP (0xff8 or 0xffc). // 2.) -// - load or store single register or either integer or vector registers -// - STP or STNP of either vector or vector registers -// - Advanced SIMD ST1 store instruction -// Must not write Rn -// 3.) optional instruction, can't be a branch, must not write Rn, may read Rn +// - load or store single register or either integer or vector registers. +// - STP or STNP of either vector or vector registers. +// - Advanced SIMD ST1 store instruction. +// - Must not write Rn. +// 3.) optional instruction, can't be a branch, must not write Rn, may read Rn. // 4.) A load or store instruction from the Load/Store register unsigned -// immediate class using Rn as the base register +// immediate class using Rn as the base register. // Each section contains a sequence of instructions that should be recognized // as erratum 843419. The test cases cover the major variations such as: -// adrp starts at 0xfff8 or 0xfffc -// Variations in instruction class for instruction 2 -// Optional instruction 3 present or not -// Load or store for instruction 4. +// - adrp starts at 0xfff8 or 0xfffc. +// - Variations in instruction class for instruction 2. +// - Optional instruction 3 present or not. +// - Load or store for instruction 4. // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 21FF8 in unpatched output. +// CHECK: t3_ff8_ldr: +// CHECK-NEXT: 21ff8: e0 01 00 f0 adrp x0, #258048 +// CHECK-NEXT: 21ffc: 21 00 40 f9 ldr x1, [x1] +// CHECK-FIX: 22000: 03 b8 00 14 b #188428 +// CHECK-NOFIX: 22000: 00 00 40 f9 ldr x0, [x0] +// CHECK-NEXT: 22004: c0 03 5f d6 ret .section .text.01, "ax", %progbits .balign 4096 .globl t3_ff8_ldr @@ -37,6 +45,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 23FF8 in unpatched output. +// CHECK: t3_ff8_ldrsimd: +// CHECK-NEXT: 23ff8: e0 01 00 b0 adrp x0, #249856 +// CHECK-NEXT: 23ffc: 21 00 40 bd ldr s1, [x1] +// CHECK-FIX: 24000: 05 b0 00 14 b #180244 +// CHECK-NOFIX: 24000: 02 04 40 f9 ldr x2, [x0, #8] +// CHECK-NEXT: 24004: c0 03 5f d6 ret .section .text.02, "ax", %progbits .balign 4096 .globl t3_ff8_ldrsimd @@ -49,6 +63,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 25FFC in unpatched output. +// CHECK: t3_ffc_ldrpost: +// CHECK-NEXT: 25ffc: c0 01 00 f0 adrp x0, #241664 +// CHECK-NEXT: 26000: 21 84 40 bc ldr s1, [x1], #8 +// CHECK-FIX: 26004: 06 a8 00 14 b #172056 +// CHECK-NOFIX: 26004: 03 08 40 f9 ldr x3, [x0, #16] +// CHECK-NEXT: 26008: c0 03 5f d6 ret .section .text.03, "ax", %progbits .balign 4096 .globl t3_ffc_ldrpost @@ -61,6 +81,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 27FF8 in unpatched output. +// CHECK: t3_ff8_strpre: +// CHECK-NEXT: 27ff8: c0 01 00 b0 adrp x0, #233472 +// CHECK-NEXT: 27ffc: 21 8c 00 bc str s1, [x1, #8]! +// CHECK-FIX: 28000: 09 a0 00 14 b #163876 +// CHECK-NOFIX: 28000: 02 00 40 f9 ldr x2, [x0] +// CHECK-NEXT: 28004: c0 03 5f d6 ret .section .text.04, "ax", %progbits .balign 4096 .globl t3_ff8_strpre @@ -73,6 +99,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 29FFC in unpatched output. +// CHECK: t3_ffc_str: +// CHECK-NEXT: 29ffc: bc 01 00 f0 adrp x28, #225280 +// CHECK-NEXT: 2a000: 42 00 00 f9 str x2, [x2] +// CHECK-FIX: 2a004: 0a 98 00 14 b #155688 +// CHECK-NOFIX: 2a004: 9c 07 00 f9 str x28, [x28, #8] +// CHECK-NEXT: 2a008: c0 03 5f d6 ret .section .text.05, "ax", %progbits .balign 4096 .globl t3_ffc_str @@ -85,6 +117,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2BFFC in unpatched output. +// CHECK: t3_ffc_strsimd: +// CHECK-NEXT: 2bffc: bc 01 00 b0 adrp x28, #217088 +// CHECK-NEXT: 2c000: 44 00 00 b9 str w4, [x2] +// CHECK-FIX: 2c004: 0c 90 00 14 b #147504 +// CHECK-NOFIX: 2c004: 84 0b 00 f9 str x4, [x28, #16] +// CHECK-NEXT: 2c008: c0 03 5f d6 ret .section .text.06, "ax", %progbits .balign 4096 .globl t3_ffc_strsimd @@ -97,6 +135,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2DFF8 in unpatched output. +// CHECK: t3_ff8_ldrunpriv: +// CHECK-NEXT: 2dff8: 9d 01 00 f0 adrp x29, #208896 +// CHECK-NEXT: 2dffc: 41 08 40 38 ldtrb w1, [x2] +// CHECK-FIX: 2e000: 0f 88 00 14 b #139324 +// CHECK-NOFIX: 2e000: bd 03 40 f9 ldr x29, [x29] +// CHECK-NEXT: 2e004: c0 03 5f d6 ret .section .text.07, "ax", %progbits .balign 4096 .globl t3_ff8_ldrunpriv @@ -109,7 +153,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2FFFC in unpatched output. - .section .text.08, "ax", %progbits +// CHECK: t3_ffc_ldur: +// CHECK-NEXT: 2fffc: 9d 01 00 b0 adrp x29, #200704 +// CHECK-NEXT: 30000: 42 40 40 b8 ldur w2, [x2, #4] +// CHECK-FIX: 30004: 10 80 00 14 b #131136 +// CHECK-NOFIX: 30004: bd 07 40 f9 ldr x29, [x29, #8] +// CHECK-NEXT: 30008: c0 03 5f d6 ret .balign 4096 .globl t3_ffc_ldur .type t3_ffc_ldur, %function @@ -121,6 +170,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 31FFC in unpatched output. +// CHECK: t3_ffc_sturh: +// CHECK-NEXT: 31ffc: 72 01 00 f0 adrp x18, #192512 +// CHECK-NEXT: 32000: 43 40 00 78 sturh w3, [x2, #4] +// CHECK-FIX: 32004: 12 78 00 14 b #122952 +// CHECK-NOFIX: 32004: 41 0a 40 f9 ldr x1, [x18, #16] +// CHECK-NEXT: 32008: c0 03 5f d6 ret .section .text.09, "ax", %progbits .balign 4096 .globl t3_ffc_sturh @@ -133,6 +188,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 33FF8 in unpatched output. +// CHECK: t3_ff8_literal: +// CHECK-NEXT: 33ff8: 72 01 00 b0 adrp x18, #184320 +// CHECK-NEXT: 33ffc: e3 ff ff 58 ldr x3, #-4 +// CHECK-FIX: 34000: 15 70 00 14 b #114772 +// CHECK-NOFIX: 34000: 52 02 40 f9 ldr x18, [x18] +// CHECK-NEXT: 34004: c0 03 5f d6 ret .section .text.10, "ax", %progbits .balign 4096 .globl t3_ff8_literal @@ -145,6 +206,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 35FFC in unpatched output. +// CHECK: t3_ffc_register: +// CHECK-NEXT: 35ffc: 4f 01 00 f0 adrp x15, #176128 +// CHECK-NEXT: 36000: 43 68 61 f8 ldr x3, [x2, x1] +// CHECK-FIX: 36004: 16 68 00 14 b #106584 +// CHECK-NOFIX: 36004: ea 05 40 f9 ldr x10, [x15, #8] +// CHECK-NEXT: 36008: c0 03 5f d6 ret .section .text.11, "ax", %progbits .balign 4096 .globl t3_ffc_register @@ -157,6 +224,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 37FF8 in unpatched output. +// CHECK: t3_ff8_stp: +// CHECK-NEXT: 37ff8: 50 01 00 b0 adrp x16, #167936 +// CHECK-NEXT: 37ffc: 61 08 00 a9 stp x1, x2, [x3] +// CHECK-FIX: 38000: 19 60 00 14 b #98404 +// CHECK-NOFIX: 38000: 0d 0a 40 f9 ldr x13, [x16, #16] +// CHECK-NEXT: 38004: c0 03 5f d6 ret .section .text.12, "ax", %progbits .balign 4096 .globl t3_ff8_stp @@ -169,6 +242,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 39FFC in unpatched output. +// CHECK: t3_ffc_stnp: +// CHECK-NEXT: 39ffc: 27 01 00 f0 adrp x7, #159744 +// CHECK-NEXT: 3a000: 61 08 00 a8 stnp x1, x2, [x3] +// CHECK-FIX: 3a004: 1a 58 00 14 b #90216 +// CHECK-NOFIX: 3a004: e9 00 40 f9 ldr x9, [x7] +// CHECK-NEXT: 3a008: c0 03 5f d6 ret .section .text.13, "ax", %progbits .balign 4096 .globl t3_ffc_stnp @@ -181,6 +260,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3BFFC in unpatched output. +// CHECK: t3_ffc_st1singlepost: +// CHECK-NEXT: 3bffc: 37 01 00 b0 adrp x23, #151552 +// CHECK-NEXT: 3c000: 20 70 82 4c st1 { v0.16b }, [x1], x2 +// CHECK-FIX: 3c004: 1c 50 00 14 b #82032 +// CHECK-NOFIX: 3c004: f6 06 40 f9 ldr x22, [x23, #8] +// CHECK-NEXT: 3c008: c0 03 5f d6 ret .section .text.14, "ax", %progbits .balign 4096 .globl t3_ffc_st1singlepost @@ -193,6 +278,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3DFF8 in unpatched output. +// CHECK: t3_ff8_st1multiple: +// CHECK-NEXT: 3dff8: 17 01 00 f0 adrp x23, #143360 +// CHECK-NEXT: 3dffc: 20 a0 00 4c st1 { v0.16b, v1.16b }, [x1] +// CHECK-FIX: 3e000: 1f 48 00 14 b #73852 +// CHECK-NOFIX: 3e000: f8 0a 40 f9 ldr x24, [x23, #16] +// CHECK-NEXT: 3e004: c0 03 5f d6 ret .section .text.15, "ax", %progbits .balign 4096 .globl t3_ff8_st1multiple @@ -205,6 +296,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3FFF8 in unpatched output. +// CHECK: t4_ff8_ldr: +// CHECK-NEXT: 3fff8: 00 01 00 b0 adrp x0, #135168 +// CHECK-NEXT: 3fffc: 21 00 40 f9 ldr x1, [x1] +// CHECK-NEXT: 40000: 42 00 00 8b add x2, x2, x0 +// CHECK-FIX: 40004: 20 40 00 14 b #65664 +// CHECK-NOFIX: 40004: 02 00 40 f9 ldr x2, [x0] +// CHECK-NEXT: 40008: c0 03 5f d6 ret .section .text.16, "ax", %progbits .balign 4096 .globl t4_ff8_ldr @@ -218,6 +316,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 41FFC in unpatched output. +// CHECK: t4_ffc_str: +// CHECK-NEXT: 41ffc: fc 00 00 f0 adrp x28, #126976 +// CHECK-NEXT: 42000: 42 00 00 f9 str x2, [x2] +// CHECK-NEXT: 42004: 20 00 02 cb sub x0, x1, x2 +// CHECK-FIX: 42008: 21 38 00 14 b #57476 +// CHECK-NOFIX: 42008: 9b 07 00 f9 str x27, [x28, #8] +// CHECK-NEXT: 4200c: c0 03 5f d6 ret .section .text.17, "ax", %progbits .balign 4096 .globl t4_ffc_str @@ -231,6 +336,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 43FF8 in unpatched output. +// CHECK: t4_ff8_stp: +// CHECK-NEXT: 43ff8: f0 00 00 b0 adrp x16, #118784 +// CHECK-NEXT: 43ffc: 61 08 00 a9 stp x1, x2, [x3] +// CHECK-NEXT: 44000: 03 7e 10 9b mul x3, x16, x16 +// CHECK-FIX: 44004: 24 30 00 14 b #49296 +// CHECK-NOFIX: 44004: 0e 0a 40 f9 ldr x14, [x16, #16] +// CHECK-NEXT: 44008: c0 03 5f d6 ret .section .text.18, "ax", %progbits .balign 4096 .globl t4_ff8_stp @@ -244,6 +356,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 45FF8 in unpatched output. +// CHECK: t4_ff8_stppre: +// CHECK-NEXT: 45ff8: d0 00 00 f0 adrp x16, #110592 +// CHECK-NEXT: 45ffc: 61 08 81 a9 stp x1, x2, [x3, #16]! +// CHECK-NEXT: 46000: 03 7e 10 9b mul x3, x16, x16 +// CHECK-FIX: 46004: 26 28 00 14 b #41112 +// CHECK-NOFIX: 46004: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-NEXT: 46008: c0 03 5f d6 ret .section .text.19, "ax", %progbits .balign 4096 .globl t4_ff8_stppre @@ -257,6 +376,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 47FF8 in unpatched output. +// CHECK: t4_ff8_stppost: +// CHECK-NEXT: 47ff8: d0 00 00 b0 adrp x16, #102400 +// CHECK-NEXT: 47ffc: 61 08 81 a8 stp x1, x2, [x3], #16 +// CHECK-NEXT: 48000: 03 7e 10 9b mul x3, x16, x16 +// CHECK-FIX: 48004: 28 20 00 14 b #32928 +// CHECK-NOFIX: 48004: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-NEXT: 48008: c0 03 5f d6 ret .section .text.20, "ax", %progbits .balign 4096 .globl t4_ff8_stppost @@ -270,6 +396,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 49FFC in unpatched output. +// CHECK: t4_ffc_stpsimd: +// CHECK-NEXT: 49ffc: b0 00 00 f0 adrp x16, #94208 +// CHECK-NEXT: 4a000: 61 08 00 ad stp q1, q2, [x3] +// CHECK-NEXT: 4a004: 03 7e 10 9b mul x3, x16, x16 +// CHECK-FIX: 4a008: 29 18 00 14 b #24740 +// CHECK-NOFIX: 4a008: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-NEXT: 4a00c: c0 03 5f d6 ret .section .text.21, "ax", %progbits .balign 4096 .globl t4_ffc_stpsimd @@ -283,6 +416,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4BFFC in unpatched output. +// CHECK: t4_ffc_stnp: +// CHECK-NEXT: 4bffc: a7 00 00 b0 adrp x7, #86016 +// CHECK-NEXT: 4c000: 61 08 00 a8 stnp x1, x2, [x3] +// CHECK-NEXT: 4c004: 1f 20 03 d5 nop +// CHECK-FIX: 4c008: 2b 10 00 14 b #16556 +// CHECK-NOFIX: 4c008: ea 00 40 f9 ldr x10, [x7] +// CHECK-NEXT: 4c00c: c0 03 5f d6 ret .section .text.22, "ax", %progbits .balign 4096 .globl t4_ffc_stnp @@ -296,6 +436,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4DFFC in unpatched output. +// CHECK: t4_ffc_st1: +// CHECK-NEXT: 4dffc: 98 00 00 f0 adrp x24, #77824 +// CHECK-NEXT: 4e000: 20 70 00 4c st1 { v0.16b }, [x1] +// CHECK-NEXT: 4e004: f6 06 40 f9 ldr x22, [x23, #8] +// CHECK-FIX: 4e008: 2d 08 00 14 b #8372 +// CHECK-NOFIX: 4e008: 18 ff 3f f9 str x24, [x24, #32760] +// CHECK-NEXT: 4e00c: c0 03 5f d6 ret .section .text.23, "ax", %progbits .balign 4096 .globl t4_ffc_st1 @@ -309,6 +456,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4FFF8 in unpatched output. +// CHECK: t3_ff8_ldr_once: +// CHECK-NEXT: 4fff8: 80 00 00 b0 adrp x0, #69632 +// CHECK-NEXT: 4fffc: 20 70 82 4c st1 { v0.16b }, [x1], x2 +// CHECK-FIX: 50000: 31 00 00 14 b #196 +// CHECK-NOFIX: 50000: 01 08 40 f9 ldr x1, [x0, #16] +// CHECK-NEXT: 50004: 02 08 40 f9 ldr x2, [x0, #16] +// CHECK-NEXT: 50008: c0 03 5f d6 ret .section .text.24, "ax", %progbits .balign 4096 .globl t3_ff8_ldr_once @@ -327,6 +481,79 @@ _start: ret +// CHECK-FIX: __CortexA53843419_22000: +// CHECK-FIX-NEXT: 5000c: 00 00 40 f9 ldr x0, [x0] +// CHECK-FIX-NEXT: 50010: fd 47 ff 17 b #-188428 +// CHECK-FIX: __CortexA53843419_24000: +// CHECK-FIX-NEXT: 50014: 02 04 40 f9 ldr x2, [x0, #8] +// CHECK-FIX-NEXT: 50018: fb 4f ff 17 b #-180244 +// CHECK-FIX: __CortexA53843419_26004: +// CHECK-FIX-NEXT: 5001c: 03 08 40 f9 ldr x3, [x0, #16] +// CHECK-FIX-NEXT: 50020: fa 57 ff 17 b #-172056 +// CHECK-FIX: __CortexA53843419_28000: +// CHECK-FIX-NEXT: 50024: 02 00 40 f9 ldr x2, [x0] +// CHECK-FIX-NEXT: 50028: f7 5f ff 17 b #-163876 +// CHECK-FIX: __CortexA53843419_2A004: +// CHECK-FIX-NEXT: 5002c: 9c 07 00 f9 str x28, [x28, #8] +// CHECK-FIX-NEXT: 50030: f6 67 ff 17 b #-155688 +// CHECK-FIX: __CortexA53843419_2C004: +// CHECK-FIX-NEXT: 50034: 84 0b 00 f9 str x4, [x28, #16] +// CHECK-FIX-NEXT: 50038: f4 6f ff 17 b #-147504 +// CHECK-FIX: __CortexA53843419_2E000: +// CHECK-FIX-NEXT: 5003c: bd 03 40 f9 ldr x29, [x29] +// CHECK-FIX-NEXT: 50040: f1 77 ff 17 b #-139324 +// CHECK-FIX: __CortexA53843419_30004: +// CHECK-FIX-NEXT: 50044: bd 07 40 f9 ldr x29, [x29, #8] +// CHECK-FIX-NEXT: 50048: f0 7f ff 17 b #-131136 +// CHECK-FIX: __CortexA53843419_32004: +// CHECK-FIX-NEXT: 5004c: 41 0a 40 f9 ldr x1, [x18, #16] +// CHECK-FIX-NEXT: 50050: ee 87 ff 17 b #-122952 +// CHECK-FIX: __CortexA53843419_34000: +// CHECK-FIX-NEXT: 50054: 52 02 40 f9 ldr x18, [x18] +// CHECK-FIX-NEXT: 50058: eb 8f ff 17 b #-114772 +// CHECK-FIX: __CortexA53843419_36004: +// CHECK-FIX-NEXT: 5005c: ea 05 40 f9 ldr x10, [x15, #8] +// CHECK-FIX-NEXT: 50060: ea 97 ff 17 b #-106584 +// CHECK-FIX: __CortexA53843419_38000: +// CHECK-FIX-NEXT: 50064: 0d 0a 40 f9 ldr x13, [x16, #16] +// CHECK-FIX-NEXT: 50068: e7 9f ff 17 b #-98404 +// CHECK-FIX: __CortexA53843419_3A004: +// CHECK-FIX-NEXT: 5006c: e9 00 40 f9 ldr x9, [x7] +// CHECK-FIX-NEXT: 50070: e6 a7 ff 17 b #-90216 +// CHECK-FIX: __CortexA53843419_3C004: +// CHECK-FIX-NEXT: 50074: f6 06 40 f9 ldr x22, [x23, #8] +// CHECK-FIX-NEXT: 50078: e4 af ff 17 b #-82032 +// CHECK-FIX: __CortexA53843419_3E000: +// CHECK-FIX-NEXT: 5007c: f8 0a 40 f9 ldr x24, [x23, #16] +// CHECK-FIX-NEXT: 50080: e1 b7 ff 17 b #-73852 +// CHECK-FIX: __CortexA53843419_40004: +// CHECK-FIX-NEXT: 50084: 02 00 40 f9 ldr x2, [x0] +// CHECK-FIX-NEXT: 50088: e0 bf ff 17 b #-65664 +// CHECK-FIX: __CortexA53843419_42008: +// CHECK-FIX-NEXT: 5008c: 9b 07 00 f9 str x27, [x28, #8] +// CHECK-FIX-NEXT: 50090: df c7 ff 17 b #-57476 +// CHECK-FIX: __CortexA53843419_44004: +// CHECK-FIX-NEXT: 50094: 0e 0a 40 f9 ldr x14, [x16, #16] +// CHECK-FIX-NEXT: 50098: dc cf ff 17 b #-49296 +// CHECK-FIX: __CortexA53843419_46004: +// CHECK-FIX-NEXT: 5009c: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-FIX-NEXT: 500a0: da d7 ff 17 b #-41112 +// CHECK-FIX: __CortexA53843419_48004: +// CHECK-FIX-NEXT: 500a4: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-FIX-NEXT: 500a8: d8 df ff 17 b #-32928 +// CHECK-FIX: __CortexA53843419_4A008: +// CHECK-FIX-NEXT: 500ac: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-FIX-NEXT: 500b0: d7 e7 ff 17 b #-24740 +// CHECK-FIX: __CortexA53843419_4C008: +// CHECK-FIX-NEXT: 500b4: ea 00 40 f9 ldr x10, [x7] +// CHECK-FIX-NEXT: 500b8: d5 ef ff 17 b #-16556 +// CHECK-FIX: __CortexA53843419_4E008: +// CHECK-FIX-NEXT: 500bc: 18 ff 3f f9 str x24, [x24, #32760] +// CHECK-FIX-NEXT: 500c0: d3 f7 ff 17 b #-8372 +// CHECK-FIX: __CortexA53843419_50000: +// CHECK-FIX-NEXT: 500c4: 01 08 40 f9 ldr x1, [x0, #16] +// CHECK-FIX-NEXT: 500c8: cf ff ff 17 b #-196 + .data .globl dat .globl dat2 Index: test/ELF/aarch64-cortex-a53-843419-thunk.s =================================================================== --- test/ELF/aarch64-cortex-a53-843419-thunk.s +++ test/ELF/aarch64-cortex-a53-843419-thunk.s @@ -4,6 +4,7 @@ // RUN: .text1 0x10000 : { *(.text.01) *(.text.02) *(.text.03) } \ // RUN: .text2 0x100000000 : { *(.text.04) } } " > %t.script // RUN: ld.lld --script %t.script -fix-cortex-a53-843419 -verbose %t.o -o %t2 | FileCheck -check-prefix=CHECK-PRINT %s +// RUN: llvm-objdump -triple=aarch64-linux-gnu -d %t2 | FileCheck %s // Test cases for Cortex-A53 Erratum 843419 that involve interactions with // range extension thunks. Both erratum fixes and range extension thunks need @@ -33,6 +34,15 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 10FFC in unpatched output. +// CHECK: t3_ff8_ldr: +// CHECK-NEXT: 10ffc: 80 ff 7f 90 adrp x0, #4294901760 +// CHECK-NEXT: 11000: 21 00 40 f9 ldr x1, [x1] +// CHECK-NEXT: 11004: 02 00 00 14 b #8 +// CHECK-NEXT: 11008: c0 03 5f d6 ret +// CHECK: __CortexA53843419_11004: +// CHECK-NEXT: 1100c: 00 08 40 f9 ldr x0, [x0, #16] +// CHECK-NEXT: 11010: fe ff ff 17 b #-8 + .section .text.04, "ax", %progbits .globl far_away .type far_away, function