Index: ELF/SectionPatcher.h =================================================================== --- ELF/SectionPatcher.h +++ ELF/SectionPatcher.h @@ -10,12 +10,53 @@ #ifndef LLD_ELF_SECTIONPATCHER_H #define LLD_ELF_SECTIONPATCHER_H +#include "Relocations.h" #include "lld/Core/LLVM.h" namespace lld { namespace elf { class OutputSection; +class SymbolBody; +class InputSection; +class PatchSection; + +// Class to describe an instance of an errata fix for the Cortex-A53 cpu +// erratum 843419. This erratum affects early revisions of the Cortex-A53 when +// running in the AArch64 execution state. It occurs when a certain sequence +// of instructions occur on a 4k page boundary. To workaround this problem the +// linker will scan for these sequences and will replace one of the +// instructions with a branch to a thunk that will execute the instruction and +// return to the instruction after the one we replaced. The replacement of the +// instruction with a branch is sufficient to prevent the erratum. + +// FIXME: The implementation only supports one patch -fix-cortex-a53-843419. +// This can be generalized to support other errata that can be fixed with a +// branch to a replacement sequence. +// FIXME: Patches have some similarity to Thunks, and it may be profitable to +// merge them or derive from a common base class if a Target needs both Thunks +// and Patches. +class Patch843419 { +public: + Patch843419(InputSection *P, uint64_t Off); + void writeTo(uint8_t *Buf, PatchSection &PS) const; + void addSymbols(PatchSection &PS); + + uint64_t getLDSTAddr() const; + Relocation transformRelocation(const Relocation &R) const; + + size_t size() const { return 8; } + + // The Section we are patching. + const InputSection *Patchee; + // The Offset of the instruction in Target we are patching. + uint64_t PatcheeOffset; + // The Offset of the patch in PatchSection + uint64_t Offset; + // A label for the start of the Patch + SymbolBody *PatchSym; + uint64_t Alignment = 4; +}; void createA53Errata843419Fixes(ArrayRef OutputSections); Index: ELF/SectionPatcher.cpp =================================================================== --- ELF/SectionPatcher.cpp +++ ELF/SectionPatcher.cpp @@ -50,13 +50,54 @@ // errata 843419 that affects r0p0, r0p1, r0p2 and r0p4 versions of the core. // To keep the initial version simple there is no generic support for multiple // architectures or multiple patches. -// -// - At this stage the implementation only supports detection and not fixing, -// this is sufficient to test the decode and recognition of the erratum sequence // Helper functions that decode AArch64 A64 instructions needed for the // detection of the erratum sequence. The functions stand alone and can be // reused outside the context of detecting the erratum sequence. + +namespace lld { +namespace elf { + +Patch843419::Patch843419(InputSection *P, uint64_t Off) + : Patchee(P), PatcheeOffset(Off) {} + +// With reference to the sequence of 4 instructions that may trigger the +// erratum (see comment Scanner for Cortex-A53 843419) the Patch consists of: +// Instruction 4.) load/store register (unsigned immediate) +// Branch to next instruction after instruction 4. +void Patch843419::writeTo(uint8_t *Buf, PatchSection &PS) const { + // Copy Instruction 4. from Target Section + // Any relocation in the TargetSection at TargetOffset will have been + // transferred to PatchSection + write32le(Buf, read32le(Patchee->Data.begin() + PatcheeOffset)); + + // Return Address is next instruction after the load/store register + // above. + uint64_t S = getLDSTAddr() + 4; + uint64_t P = PatchSym->getVA() + 4; + write32le(Buf + 4, 0x14000000); + Target->relocateOne(Buf + 4, R_AARCH64_JUMP26, S - P); +} + +void Patch843419::addSymbols(PatchSection &PS) { + PatchSym = addSyntheticLocal( + Saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())), STT_FUNC, + Offset, size(), &PS); +} + +uint64_t Patch843419::getLDSTAddr() const { + return Patchee->getParent()->Addr + Patchee->OutSecOff + PatcheeOffset; +} + +// If there is a Relocation that is applied LDST, we must add an equivalent +// Relocation to the PatchSection at the same offset as the LDST instruction. +Relocation Patch843419::transformRelocation(const Relocation &R) const { + return {R.Expr, R.Type, Offset, R.Addend, R.Sym}; +} + +} // end namespace elf +} // end namespace lld + struct A64 { // ADRP 1 | immlo (2) | 1 | 0 0 0 0 | immhi (19) | Rd (5) @@ -280,7 +321,7 @@ // The instruction sequence is common in compiled AArch64 code, however it is // sensitive to address, which limits the number of times it has to be applied // and limits the amount of disassembly that we have to do. -// +// // The erratum conditions are in summary: // 1.) An ADRP instruction that writes to register Rn with low 12 bits of // address of instruction either 0xff8 or 0xffc @@ -340,12 +381,34 @@ static void report843419Fix(uint64_t AdrpAddr) { if (!Config->Verbose) return; + // In most cases (a single .text section) the AdrpAddr will be the same in + // the patched and unpatched output, however as the addition of a PatchSection + // might alter the address of subsequent sections this may be some multiple + // of 4k different in the patched version. message("detected cortex-a53-843419 erratum sequence starting at " + utohexstr(AdrpAddr) + " in unpatched output."); } -static void scanCortexA53Errata843419(InputSection *IS, uint64_t &Off, - uint64_t Size) { +static Patch843419 *makePatch843419(InputSection *Patchee, uint64_t ADRPOff, + uint64_t LDSTOff) { + uint64_t ISAddr = Patchee->getParent()->Addr + Patchee->OutSecOff; + report843419Fix(ISAddr + ADRPOff); + return make(Patchee, LDSTOff); +} + +// The erratum conditions are in summary: +// 1.) An ADRP instruction that writes to register Rn with low 12 bits of +// address of instruction either 0xff8 or 0xffc +// 2.) A load or store instruction that can be: +// - A single register load or store, of either integer or vector registers +// - An STP or STNP, of either integer or vector registers +// - An Advanced SIMD ST1 store instruction +// - Must not write to Rn, but may optionally read from it. +// 3.) An optional instruction that is not a branch and does not write to Rn +// 4.) A load or store from the Load/store register (unsigned immediate) class +// that uses Rn as the base address register +static Patch843419 *scanCortexA53Errata843419(InputSection *IS, uint64_t &Off, + uint64_t Size) { uint64_t ISAddr = IS->getParent()->Addr + IS->OutSecOff; const uint8_t *Buf = IS->Data.begin(); @@ -358,19 +421,20 @@ if (Off >= Size || Size - Off < 12) { // Need at least 3 instructions to detect sequence Off = Size; - return; + return nullptr; } + Patch843419 *Patch = nullptr; uint32_t Instr1 = *reinterpret_cast(Buf + Off); if (A64::isADRP(Instr1)) { uint32_t Instr2 = *reinterpret_cast(Buf + Off + 4); uint32_t Instr3 = *reinterpret_cast(Buf + Off + 8); if (is843419ErratumSequence(Instr1, Instr2, Instr3)) - report843419Fix(ISAddr + Off); + Patch = makePatch843419(IS, Off, Off + 8); else if (OptionalAllowed && !A64::isBranch(Instr3)) { uint32_t Instr4 = *reinterpret_cast(Buf + Off + 12); if (is843419ErratumSequence(Instr1, Instr2, Instr4)) - report843419Fix(ISAddr + Off); + Patch = makePatch843419(IS, Off, Off + 12); } } if (((ISAddr + Off) & 0xfff) == 0xff8) @@ -378,6 +442,7 @@ else // Skip to next 0xff8 Off += 0xffc; + return Patch; } // The AArch64 ABI permits data in executable sections. We must avoid scanning @@ -435,10 +500,66 @@ return SectionMap; } +static PatchSection *getPatchSec(OutputSection *OS, + ArrayRef ISR, + PatchSection *CurPS) { + if (CurPS) + return CurPS; + + uint64_t Off = 0; + for (const InputSection *IS : ISR) { + if ((IS->Flags & SHF_EXECINSTR) == 0) + break; + Off = IS->OutSecOff + IS->getSize(); + } + return make(OS, Off, 0x1000); +} + +static void addPatchRelocations(InputSection *IS, PatchSection *PS, Patch843419 *Patch) { + auto RelIt = std::find_if(IS->Relocations.begin(), IS->Relocations.end(), + [=](const Relocation &R) { + return R.Offset == Patch->PatcheeOffset; + }); + if (RelIt != IS->Relocations.end()) { + // We have an existing relocation at the same offset as the instruction we want to patch. + // We must transfer the relocation to the PatchSection so that we get the same result. + PS->Relocations.push_back(Patch->transformRelocation(*RelIt)); + // Change our existing InputSection relocation to a branch to the patch. + *RelIt = {R_PC, R_AARCH64_JUMP26, Patch->PatcheeOffset, 0, + Patch->PatchSym}; + } else + // Create a new branch relocation to the patch. + IS->Relocations.push_back({R_PC, R_AARCH64_JUMP26, + Patch->PatcheeOffset, 0, Patch->PatchSym}); +} + // Scan all the executable code in an AArch64 link to detect the Cortex-A53 // erratum 843419. -// FIXME: The current implementation only scans for the erratum sequence, it -// does not attempt to fix it. +// +// The procedure for fixing erratum is: +// 1.) Disassemble code to find the erratum sequence. +// 2.) Create an instance of Patch843419 to store the location we need to patch, +// this also allows us to determine the return address. +// 3.) Find any relocation at the location we need to insert the branch +// as we may need to apply it to the PatchSection to get the correct result. +// we also need to check to see if we haven't already patched the +// instruction. +// 4.) Add patch to patch section to get a location for the patch that we can +// branch to. +// 5.) Replace or create a new relocation at the location we want a branch +// for aarch64 the relocation will overwrite the existing instruction with +// a branch. +// 6.) Insert a PatchSection if contains any patches. +// +// FIXME: The implementation is specific to AArch64 as we only have one +// supported fix. The Target specific parts will need extracting if we need +// to support other Targets. +// +// FIXME: The implementation runs in a single pass as by sizing the patch +// section to be a multiple of a page in size, adding a patch section doesn't +// invalidate any address calculations modulo patch size in subsequent sections. +// If AArch64 needs to support range thunks or other patches then this may need +// to be made multi-pass or merging with the Thunk implementation. void lld::elf::createA53Errata843419Fixes( ArrayRef OutputSections) { std::map> SectionMap = @@ -449,6 +570,7 @@ continue; for (BaseCommand *BC : OS->Commands) if (auto *ISD = dyn_cast(BC)) { + PatchSection *CurPS = nullptr; for (InputSection *IS : ISD->Sections) { // LLD doesn't use the erratum sequence in SyntheticSections if (isa(IS)) @@ -466,13 +588,31 @@ uint64_t Limit = (Data == MapSyms.end()) ? IS->Data.size() : (*Data)->Value; - while (Off < Limit) - scanCortexA53Errata843419(IS, Off, Limit); + while (Off < Limit) { + if (Patch843419 *P = scanCortexA53Errata843419(IS, Off, Limit)) { + // We have a instance of the erratum with details in patch. Add the + // patch to the patchSection and add relocations to branch to the patch. + // On AArch64 the branch relocation implementation will convert the + // instruction to a branch. + CurPS = getPatchSec(OS, ISD->Sections, CurPS); + CurPS->addPatch(P); + addPatchRelocations(IS, CurPS, P); + } + } if (Data == MapSyms.end()) break; Code = std::next(Data); } } + // We have at least one patch, insert the patch section + if (CurPS) + ISD->Sections.insert( + std::upper_bound(ISD->Sections.begin(), ISD->Sections.end(), + CurPS, + [&](InputSection *A, InputSection *B) { + return A->OutSecOff < B->OutSecOff; + }), + CurPS); } } } Index: ELF/SyntheticSections.h =================================================================== --- ELF/SyntheticSections.h +++ ELF/SyntheticSections.h @@ -740,6 +740,28 @@ size_t Size = 0; }; +// A container for one or more linker generated patches that prevent CPU +// errata. An example of a patch is the -fix-cortex-a53-843419. +// FIXME: There is some scope to merge PatchSection with ThunkSection which +// may be profitable if a patch is needed for a Target that needs Thunks. +struct Patch843419; +class PatchSection : public SyntheticSection { +public: + PatchSection(OutputSection *OS, uint64_t Off, size_t SizeAlign); + + void addPatch(Patch843419 *P); + void writeTo(uint8_t *Buf) override; + + size_t getSize() const override; + +private: + std::vector Patches; + size_t Size = 0; + + // The Section Size must be 0 modulo SizeMultiple + size_t SizeMultiple; +}; + template InputSection *createCommonSection(); InputSection *createInterpSection(); template MergeInputSection *createCommentSection(); Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -21,6 +21,7 @@ #include "LinkerScript.h" #include "Memory.h" #include "OutputSections.h" +#include "SectionPatcher.h" #include "Strings.h" #include "SymbolTable.h" #include "Target.h" @@ -2317,6 +2318,37 @@ return T->getTargetInputSection(); } +PatchSection::PatchSection(OutputSection *OS, uint64_t Off, size_t Multiple) + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, + ".text.patch"), SizeMultiple(Multiple) { + this->Parent = OS; + this->OutSecOff = Off; +} + +void PatchSection::addPatch(Patch843419 *P) { + uint64_t Off = alignTo(Size, P->Alignment); + P->Offset = Off; + Patches.push_back(P); + P->addSymbols(*this); + Size = Off + P->size(); +} + +size_t PatchSection::getSize() const { + // For some errata, such as Cortex-A53-843419, that have conditions that are + // dependent on the offset of instructions within a page. By making the size + // of the patch size a multiple of the page size, it will not affect the + // instruction address modulo the page size of subsequent sections. + return alignTo(Size, SizeMultiple); +} + +void PatchSection::writeTo(uint8_t *Buf) { + for (const Patch843419 *P : Patches) + P->writeTo(Buf + P->Offset, *this); + // For a SyntheticSection Buf already has OutSecOff added, but relocateAlloc + // also adds OutSecOff so we need to account for it here. + this->relocateAlloc(Buf - OutSecOff, Buf - OutSecOff + Size); +} + InputSection *InX::ARMAttributes; BssSection *InX::Bss; BssSection *InX::BssRelRo; Index: test/ELF/aarch64-cortex-a53-843419-address.s =================================================================== --- test/ELF/aarch64-cortex-a53-843419-address.s +++ test/ELF/aarch64-cortex-a53-843419-address.s @@ -5,6 +5,7 @@ // RUN: .text2 : { *.(newos) } \ // RUN: .data : { *(.data) } }" > %t.script // RUN: ld.lld --script %t.script -fix-cortex-a53-843419 -verbose %t.o -o %t2 | FileCheck %s +// RUN: llvm-objdump -d -triple=aarch64-linux-gnu %t2 | FileCheck %s -check-prefix=CHECK-FIX // Test cases for Cortex-A53 Erratum 843419 that involve interactions // between the generated patches and the address of sections @@ -35,6 +36,11 @@ // - We can ignore erratum sequences in multiple literal data ranges. // CHECK: detected cortex-a53-843419 erratum sequence starting at FF8 in unpatched output. +// CHECK-FIX: t3_ff8_ldr: +// CHECK-FIX-NEXT: ff8: 40 00 00 b0 adrp x0, #36864 +// CHECK-FIX-NEXT: ffc: 21 00 40 f9 ldr x1, [x1] +// CHECK-FIX-NEXT: 1000: ff 0f 00 14 b #16380 +// CHECK-FIX-NEXT: 1004: c0 03 5f d6 ret .section .text.01, "ax", %progbits .balign 4096 @@ -54,6 +60,11 @@ .local $x.999 $x.999: // CHECK-NEXT: detected cortex-a53-843419 erratum sequence starting at 1FFC in unpatched output. +// CHECK-FIX: t3_ffc_ldrsimd: +// CHECK-FIX-NEXT: 1ffc: 40 00 00 90 adrp x0, #32768 +// CHECK-FIX-NEXT: 2000: 21 00 40 bd ldr s1, [x1] +// CHECK-FIX-NEXT: 2004: 00 0c 00 14 b #12288 +// CHECK-FIX-NEXT: 2008: c0 03 5f d6 ret .globl t3_ffc_ldrsimd .type t3_ffc_ldrsimd, %function .space 4096 - 12 @@ -86,6 +97,11 @@ // Check that we can recognise the erratum sequence post literal data // CHECK-NEXT: detected cortex-a53-843419 erratum sequence starting at 3FF8 in unpatched output. +// CHECK-FIX: t3_ffc_ldr: +// CHECK-FIX-NEXT: 3ff8: 20 00 00 d0 adrp x0, #24576 +// CHECK-FIX-NEXT: 3ffc: 21 00 40 f9 ldr x1, [x1] +// CHECK-FIX-NEXT: 4000: 03 04 00 14 b #4108 +// CHECK-FIX-NEXT: 4004: c0 03 5f d6 ret .space 4096 - 12 .globl t3_ffc_ldr @@ -96,6 +112,16 @@ ldr x0, [x0, :got_lo12:dat] ret +// CHECK-FIX: __CortexA53843419_1000: +// CHECK-FIX-NEXT: 4ffc: 00 08 40 f9 ldr x0, [x0, #16] +// CHECK-FIX-NEXT: 5000: 01 f0 ff 17 b #-16380 +// CHECK-FIX: __CortexA53843419_2004: +// CHECK-FIX-NEXT: 5004: 02 08 40 f9 ldr x2, [x0, #16] +// CHECK-FIX-NEXT: 5008: 00 f4 ff 17 b #-12288 +// CHECK-FIX: __CortexA53843419_4000: +// CHECK-FIX-NEXT: 500c: 00 08 40 f9 ldr x0, [x0, #16] +// CHECK-FIX-NEXT: 5010: fd fb ff 17 b #-4108 + .section .text.02, "ax", %progbits .space 4096 - 12 @@ -104,6 +130,12 @@ // affected by any patches added to previous InputSectionDescription // CHECK: detected cortex-a53-843419 erratum sequence starting at 4FFC in unpatched output. +// The PatchSection above is the reason behind the 0x1000 gap between the +// message (unpatched) and output (patched) +// CHECK-FIX: t3_ffc_str: +// CHECK-FIX-NEXT: 5ffc: 20 00 00 90 adrp x0, #16384 +// CHECK-FIX-NEXT: 6000: 21 00 00 f9 str x1, [x1] +// CHECK-FIX-NEXT: 6004: fd 03 00 14 b #4084 .section .newisd, "ax", %progbits .globl t3_ffc_str @@ -115,6 +147,10 @@ ret .space 4096 - 20 +// CHECK-FIX: __CortexA53843419_5004: +// CHECK-FIX-NEXT: 6ff8: 00 08 40 f9 ldr x0, [x0, #16] +// CHECK-FIX-NEXT: 6ffc: 03 fc ff 17 b #-4084 + // CHECK: detected cortex-a53-843419 erratum sequence starting at 5FF8 in unpatched output. // Start a new OutputSection (see Linker Script) so the @@ -128,11 +164,21 @@ str x1, [x1, #0] ldr x0, [x0, :got_lo12:dat] ret +// The two patch sections above are the reason behind the 0x2000 gap between +// the message (unpatched) and output (patched) +// CHECK-FIX: t3_ff8_str: +// CHECK-FIX-NEXT: 7ff8: 00 00 00 d0 adrp x0, #8192 +// CHECK-FIX-NEXT: 7ffc: 21 00 00 f9 str x1, [x1] +// CHECK-FIX-NEXT: 8000: 03 00 00 14 b #12 +// CHECK-FIX-NEXT: 8004: c0 03 5f d6 ret .globl _start .type _start, %function _start: ret +// CHECK-FIX: __CortexA53843419_6000: +// CHECK-FIX-NEXT: 800c: 00 08 40 f9 ldr x0, [x0, #16] +// CHECK-FIX-NEXT: 8010: fd ff ff 17 b #-12 .data .globl dat dat: .word 0 Index: test/ELF/aarch64-cortex-a53-843419-recognize.s =================================================================== --- test/ELF/aarch64-cortex-a53-843419-recognize.s +++ test/ELF/aarch64-cortex-a53-843419-recognize.s @@ -1,6 +1,9 @@ // REQUIRES: aarch64 // RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o // RUN: ld.lld -fix-cortex-a53-843419 -verbose %t.o -o %t2 | FileCheck -check-prefix CHECK-PRINT %s +// RUN: llvm-objdump -triple=aarch64-linux-gnu -d %t2 | FileCheck %s -check-prefixes=CHECK,CHECK-FIX +// RUN: ld.lld %t.o -o %t3 +// RUN: llvm-objdump -triple=aarch64-linux-gnu -d %t3 | FileCheck %s -check-prefixes=CHECK,CHECK-NOFIX // Test cases for Cortex-A53 Erratum 843419 // See ARM-EPM-048406 Cortex_A53_MPCore_Software_Developers_Errata_Notice.pdf @@ -25,6 +28,13 @@ // Load or store for instruction 4. // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 21FF8 in unpatched output. +// CHECK: t3_ff8_ldr: +// CHECK-NEXT: 21ff8: e0 01 00 f0 adrp x0, #258048 +// CHECK-NEXT: 21ffc: 21 00 40 f9 ldr x1, [x1] +// CHECK-FIX: 22000: 03 b8 00 14 b #188428 +// CHECK-NOFIX: 22000: 00 00 40 f9 ldr x0, [x0] +// CHECK: 22004: c0 03 5f d6 ret + .section .text.01, "ax", %progbits .balign 4096 .globl t3_ff8_ldr @@ -37,6 +47,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 23FF8 in unpatched output. +// CHECK: t3_ff8_ldrsimd: +// CHECK-NEXT: 23ff8: e0 01 00 b0 adrp x0, #249856 +// CHECK-NEXT: 23ffc: 21 00 40 bd ldr s1, [x1] +// CHECK-FIX: 24000: 05 b0 00 14 b #180244 +// CHECK-NOFIX: 24000: 02 04 40 f9 ldr x2, [x0, #8] +// CHECK: 24004: c0 03 5f d6 ret .section .text.02, "ax", %progbits .balign 4096 .globl t3_ff8_ldrsimd @@ -49,6 +65,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 25FFC in unpatched output. +// CHECK: t3_ffc_ldrpost: +// CHECK-NEXT: 25ffc: c0 01 00 f0 adrp x0, #241664 +// CHECK-NEXT: 26000: 21 84 40 bc ldr s1, [x1], #8 +// CHECK-FIX: 26004: 06 a8 00 14 b #172056 +// CHECK-NOFIX: 26004: 03 08 40 f9 ldr x3, [x0, #16] +// CHECK: 26008: c0 03 5f d6 ret .section .text.03, "ax", %progbits .balign 4096 .globl t3_ffc_ldrpost @@ -61,6 +83,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 27FF8 in unpatched output. +// CHECK: t3_ff8_strpre: +// CHECK-NEXT: 27ff8: c0 01 00 b0 adrp x0, #233472 +// CHECK-NEXT: 27ffc: 21 8c 00 bc str s1, [x1, #8]! +// CHECK-FIX: 28000: 09 a0 00 14 b #163876 +// CHECK-NOFIX: 28000: 02 00 40 f9 ldr x2, [x0] +// CHECK: 28004: c0 03 5f d6 ret .section .text.04, "ax", %progbits .balign 4096 .globl t3_ff8_strpre @@ -73,6 +101,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 29FFC in unpatched output. +// CHECK: t3_ffc_str: +// CHECK-NEXT: 29ffc: bc 01 00 f0 adrp x28, #225280 +// CHECK-NEXT: 2a000: 42 00 00 f9 str x2, [x2] +// CHECK-FIX: 2a004: 0a 98 00 14 b #155688 +// CHECK-NOFIX: 2a004: 9c 07 00 f9 str x28, [x28, #8] +// CHECK: 2a008: c0 03 5f d6 ret .section .text.05, "ax", %progbits .balign 4096 .globl t3_ffc_str @@ -85,6 +119,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2BFFC in unpatched output. +// CHECK: t3_ffc_strsimd: +// CHECK-NEXT: 2bffc: bc 01 00 b0 adrp x28, #217088 +// CHECK-NEXT: 2c000: 44 00 00 b9 str w4, [x2] +// CHECK-FIX: 2c004: 0c 90 00 14 b #147504 +// CHECK-NOFIX: 2c004: 84 0b 00 f9 str x4, [x28, #16] +// CHECK: 2c008: c0 03 5f d6 ret .section .text.06, "ax", %progbits .balign 4096 .globl t3_ffc_strsimd @@ -97,6 +137,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2DFF8 in unpatched output. +// CHECK: t3_ff8_ldrunpriv: +// CHECK-NEXT: 2dff8: 9d 01 00 f0 adrp x29, #208896 +// CHECK-NEXT: 2dffc: 41 08 40 38 ldtrb w1, [x2] +// CHECK-FIX: 2e000: 0f 88 00 14 b #139324 +// CHECK-NOFIX: 2e000: bd 03 40 f9 ldr x29, [x29] +// CHECK: 2e004: c0 03 5f d6 ret .section .text.07, "ax", %progbits .balign 4096 .globl t3_ff8_ldrunpriv @@ -109,6 +155,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2FFFC in unpatched output. +// CHECK: t3_ffc_ldur: +// CHECK-NEXT: 2fffc: 9d 01 00 b0 adrp x29, #200704 +// CHECK-NEXT: 30000: 42 40 40 b8 ldur w2, [x2, #4] +// CHECK-FIX: 30004: 10 80 00 14 b #131136 +// CHECK-NOFIX: 30004: bd 07 40 f9 ldr x29, [x29, #8] +// CHECK: 30008: c0 03 5f d6 ret .section .text.08, "ax", %progbits .balign 4096 .globl t3_ffc_ldur @@ -121,6 +173,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 31FFC in unpatched output. +// CHECK: t3_ffc_sturh: +// CHECK-NEXT: 31ffc: 72 01 00 f0 adrp x18, #192512 +// CHECK-NEXT: 32000: 43 40 00 78 sturh w3, [x2, #4] +// CHECK-FIX: 32004: 12 78 00 14 b #122952 +// CHECK-NOFIX: 32004: 41 0a 40 f9 ldr x1, [x18, #16] +// CHECK: 32008: c0 03 5f d6 ret .section .text.09, "ax", %progbits .balign 4096 .globl t3_ffc_sturh @@ -133,6 +191,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 33FF8 in unpatched output. +// CHECK: t3_ff8_literal: +// CHECK-NEXT: 33ff8: 72 01 00 b0 adrp x18, #184320 +// CHECK-NEXT: 33ffc: e3 ff ff 58 ldr x3, #-4 +// CHECK-FIX: 34000: 15 70 00 14 b #114772 +// CHECK-NOFIX: 34000: 52 02 40 f9 ldr x18, [x18] +// CHECK: 34004: c0 03 5f d6 ret .section .text.10, "ax", %progbits .balign 4096 .globl t3_ff8_literal @@ -145,6 +209,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 35FFC in unpatched output. +// CHECK: t3_ffc_register: +// CHECK-NEXT: 35ffc: 4f 01 00 f0 adrp x15, #176128 +// CHECK-NEXT: 36000: 43 68 61 f8 ldr x3, [x2, x1] +// CHECK-FIX: 36004: 16 68 00 14 b #106584 +// CHECK-NOFIX: 36004: ea 05 40 f9 ldr x10, [x15, #8] +// CHECK: 36008: c0 03 5f d6 ret .section .text.11, "ax", %progbits .balign 4096 .globl t3_ffc_register @@ -157,6 +227,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 37FF8 in unpatched output. +// CHECK: t3_ff8_stp: +// CHECK-NEXT: 37ff8: 50 01 00 b0 adrp x16, #167936 +// CHECK-NEXT: 37ffc: 61 08 00 a9 stp x1, x2, [x3] +// CHECK-FIX: 38000: 19 60 00 14 b #98404 +// CHECK-NOFIX: 38000: 0d 0a 40 f9 ldr x13, [x16, #16] +// CHECK: 38004: c0 03 5f d6 ret .section .text.12, "ax", %progbits .balign 4096 .globl t3_ff8_stp @@ -169,6 +245,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 39FFC in unpatched output. +// CHECK: t3_ffc_stnp: +// CHECK-NEXT: 39ffc: 27 01 00 f0 adrp x7, #159744 +// CHECK-NEXT: 3a000: 61 08 00 a8 stnp x1, x2, [x3] +// CHECK-FIX: 3a004: 1a 58 00 14 b #90216 +// CHECK-NOFIX: 3a004: e9 00 40 f9 ldr x9, [x7] +// CHECK: 3a008: c0 03 5f d6 ret .section .text.13, "ax", %progbits .balign 4096 .globl t3_ffc_stnp @@ -181,6 +263,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3BFFC in unpatched output. +// CHECK: t3_ffc_st1singlepost: +// CHECK-NEXT: 3bffc: 37 01 00 b0 adrp x23, #151552 +// CHECK-NEXT: 3c000: 20 70 82 4c st1 { v0.16b }, [x1], x2 +// CHECK-FIX: 3c004: 1c 50 00 14 b #82032 +// CHECK-NOFIX: 3c004: f6 06 40 f9 ldr x22, [x23, #8] +// CHECK: 3c008: c0 03 5f d6 ret .section .text.14, "ax", %progbits .balign 4096 .globl t3_ffc_st1singlepost @@ -193,6 +281,12 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3DFF8 in unpatched output. +// CHECK: t3_ff8_st1multiple: +// CHECK-NEXT: 3dff8: 17 01 00 f0 adrp x23, #143360 +// CHECK-NEXT: 3dffc: 20 a0 00 4c st1 { v0.16b, v1.16b }, [x1] +// CHECK-FIX: 3e000: 1f 48 00 14 b #73852 +// CHECK-NOFIX: 3e000: f8 0a 40 f9 ldr x24, [x23, #16] +// CHECK: 3e004: c0 03 5f d6 ret .section .text.15, "ax", %progbits .balign 4096 .globl t3_ff8_st1multiple @@ -205,6 +299,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3FFF8 in unpatched output. +// CHECK: t4_ff8_ldr: +// CHECK-NEXT: 3fff8: 00 01 00 b0 adrp x0, #135168 +// CHECK-NEXT: 3fffc: 21 00 40 f9 ldr x1, [x1] +// CHECK-NEXT: 40000: 42 00 00 8b add x2, x2, x0 +// CHECK-FIX: 40004: 20 40 00 14 b #65664 +// CHECK-NOFIX: 40004: 02 00 40 f9 ldr x2, [x0] +// CHECK: 40008: c0 03 5f d6 ret .section .text.16, "ax", %progbits .balign 4096 .globl t4_ff8_ldr @@ -218,6 +319,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 41FFC in unpatched output. +// CHECK: t4_ffc_str: +// CHECK-NEXT: 41ffc: fc 00 00 f0 adrp x28, #126976 +// CHECK-NEXT: 42000: 42 00 00 f9 str x2, [x2] +// CHECK-NEXT: 42004: 20 00 02 cb sub x0, x1, x2 +// CHECK-FIX: 42008: 21 38 00 14 b #57476 +// CHECK-NOFIX: 42008: 9b 07 00 f9 str x27, [x28, #8] +// CHECK: 4200c: c0 03 5f d6 ret .section .text.17, "ax", %progbits .balign 4096 .globl t4_ffc_str @@ -231,6 +339,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 43FF8 in unpatched output. +// CHECK: t4_ff8_stp: +// CHECK-NEXT: 43ff8: f0 00 00 b0 adrp x16, #118784 +// CHECK-NEXT: 43ffc: 61 08 00 a9 stp x1, x2, [x3] +// CHECK-NEXT: 44000: 03 7e 10 9b mul x3, x16, x16 +// CHECK-FIX: 44004: 24 30 00 14 b #49296 +// CHECK-NOFIX: 44004: 0e 0a 40 f9 ldr x14, [x16, #16] +// CHECK: 44008: c0 03 5f d6 ret .section .text.18, "ax", %progbits .balign 4096 .globl t4_ff8_stp @@ -244,6 +359,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 45FF8 in unpatched output. +// CHECK: t4_ff8_stppre: +// CHECK-NEXT: 45ff8: d0 00 00 f0 adrp x16, #110592 +// CHECK-NEXT: 45ffc: 61 08 81 a9 stp x1, x2, [x3, #16]! +// CHECK-NEXT: 46000: 03 7e 10 9b mul x3, x16, x16 +// CHECK-FIX: 46004: 26 28 00 14 b #41112 +// CHECK-NOFIX: 46004: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK: 46008: c0 03 5f d6 ret .section .text.19, "ax", %progbits .balign 4096 .globl t4_ff8_stppre @@ -257,6 +379,14 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 47FF8 in unpatched output. +// CHECK: t4_ff8_stppost: +// CHECK-NEXT: 47ff8: d0 00 00 b0 adrp x16, #102400 +// CHECK-NEXT: 47ffc: 61 08 81 a8 stp x1, x2, [x3], #16 +// CHECK-NEXT: 48000: 03 7e 10 9b mul x3, x16, x16 +// CHECK-FIX: 48004: 28 20 00 14 b #32928 +// CHECK-NOFIX: 48004: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK: 48008: c0 03 5f d6 ret + .section .text.20, "ax", %progbits .balign 4096 .globl t4_ff8_stppost @@ -270,6 +400,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 49FFC in unpatched output. +// CHECK: t4_ffc_stpsimd: +// CHECK-NEXT: 49ffc: b0 00 00 f0 adrp x16, #94208 +// CHECK-NEXT: 4a000: 61 08 00 ad stp q1, q2, [x3] +// CHECK-NEXT: 4a004: 03 7e 10 9b mul x3, x16, x16 +// CHECK-FIX: 4a008: 29 18 00 14 b #24740 +// CHECK-NOFIX: 4a008: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK: 4a00c: c0 03 5f d6 ret .section .text.21, "ax", %progbits .balign 4096 .globl t4_ffc_stpsimd @@ -283,6 +420,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4BFFC in unpatched output. +// CHECK: t4_ffc_stnp: +// CHECK-NEXT: 4bffc: a7 00 00 b0 adrp x7, #86016 +// CHECK-NEXT: 4c000: 61 08 00 a8 stnp x1, x2, [x3] +// CHECK-NEXT: 4c004: 1f 20 03 d5 nop +// CHECK-FIX: 4c008: 2b 10 00 14 b #16556 +// CHECK-NOFIX: 4c008: ea 00 40 f9 ldr x10, [x7] +// CHECK: 4c00c: c0 03 5f d6 ret .section .text.22, "ax", %progbits .balign 4096 .globl t4_ffc_stnp @@ -296,6 +440,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4DFFC in unpatched output. +// CHECK: t4_ffc_st1: +// CHECK-NEXT: 4dffc: 98 00 00 f0 adrp x24, #77824 +// CHECK-NEXT: 4e000: 20 70 00 4c st1 { v0.16b }, [x1] +// CHECK-NEXT: 4e004: f6 06 40 f9 ldr x22, [x23, #8] +// CHECK-FIX: 4e008: 2d 08 00 14 b #8372 +// CHECK-NOFIX: 4e008: 18 ff 3f f9 str x24, [x24, #32760] +// CHECK: 4e00c: c0 03 5f d6 ret .section .text.23, "ax", %progbits .balign 4096 .globl t4_ffc_st1 @@ -309,6 +460,13 @@ ret // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4FFF8 in unpatched output. +// CHECK: t3_ff8_ldr_once: +// CHECK-NEXT: 4fff8: 80 00 00 b0 adrp x0, #69632 +// CHECK-NEXT: 4fffc: 20 70 82 4c st1 { v0.16b }, [x1], x2 +// CHECK-FIX: 50000: 31 00 00 14 b #196 +// CHECK-NOFIX: 50000: 01 08 40 f9 ldr x1, [x0, #16] +// CHECK: 50004: 02 08 40 f9 ldr x2, [x0, #16] +// CHECK-NEXT: 50008: c0 03 5f d6 ret .section .text.24, "ax", %progbits .balign 4096 .globl t3_ff8_ldr_once @@ -327,6 +485,80 @@ _start: ret +// CHECK-FIX: __CortexA53843419_22000: +// CHECK-FIX-NEXT: 5000c: 00 00 40 f9 ldr x0, [x0] +// CHECK-FIX-NEXT: 50010: fd 47 ff 17 b #-188428 +// CHECK-FIX: __CortexA53843419_24000: +// CHECK-FIX-NEXT: 50014: 02 04 40 f9 ldr x2, [x0, #8] +// CHECK-FIX-NEXT: 50018: fb 4f ff 17 b #-180244 +// CHECK-FIX: __CortexA53843419_26004: +// CHECK-FIX-NEXT: 5001c: 03 08 40 f9 ldr x3, [x0, #16] +// CHECK-FIX-NEXT: 50020: fa 57 ff 17 b #-172056 +// CHECK-FIX: __CortexA53843419_28000: +// CHECK-FIX-NEXT: 50024: 02 00 40 f9 ldr x2, [x0] +// CHECK-FIX-NEXT: 50028: f7 5f ff 17 b #-163876 +// CHECK-FIX: __CortexA53843419_2A004: +// CHECK-FIX-NEXT: 5002c: 9c 07 00 f9 str x28, [x28, #8] +// CHECK-FIX-NEXT: 50030: f6 67 ff 17 b #-155688 +// CHECK-FIX: __CortexA53843419_2C004: +// CHECK-FIX-NEXT: 50034: 84 0b 00 f9 str x4, [x28, #16] +// CHECK-FIX-NEXT: 50038: f4 6f ff 17 b #-147504 +// CHECK-FIX: __CortexA53843419_2E000: +// CHECK-FIX-NEXT: 5003c: bd 03 40 f9 ldr x29, [x29] +// CHECK-FIX-NEXT: 50040: f1 77 ff 17 b #-139324 +// CHECK-FIX: __CortexA53843419_30004: +// CHECK-FIX-NEXT: 50044: bd 07 40 f9 ldr x29, [x29, #8] +// CHECK-FIX-NEXT: 50048: f0 7f ff 17 b #-131136 +// CHECK-FIX: __CortexA53843419_32004: +// CHECK-FIX-NEXT: 5004c: 41 0a 40 f9 ldr x1, [x18, #16] +// CHECK-FIX-NEXT: 50050: ee 87 ff 17 b #-122952 +// CHECK-FIX: __CortexA53843419_34000: +// CHECK-FIX-NEXT: 50054: 52 02 40 f9 ldr x18, [x18] +// CHECK-FIX-NEXT: 50058: eb 8f ff 17 b #-114772 +// CHECK-FIX: __CortexA53843419_36004: +// CHECK-FIX-NEXT: 5005c: ea 05 40 f9 ldr x10, [x15, #8] +// CHECK-FIX-NEXT: 50060: ea 97 ff 17 b #-106584 +// CHECK-FIX: __CortexA53843419_38000: +// CHECK-FIX-NEXT: 50064: 0d 0a 40 f9 ldr x13, [x16, #16] +// CHECK-FIX-NEXT: 50068: e7 9f ff 17 b #-98404 +// CHECK-FIX: __CortexA53843419_3A004: +// CHECK-FIX-NEXT: 5006c: e9 00 40 f9 ldr x9, [x7] +// CHECK-FIX-NEXT: 50070: e6 a7 ff 17 b #-90216 +// CHECK-FIX: __CortexA53843419_3C004: +// CHECK-FIX-NEXT: 50074: f6 06 40 f9 ldr x22, [x23, #8] +// CHECK-FIX-NEXT: 50078: e4 af ff 17 b #-82032 +// CHECK-FIX: __CortexA53843419_3E000: +// CHECK-FIX-NEXT: 5007c: f8 0a 40 f9 ldr x24, [x23, #16] +// CHECK-FIX-NEXT: 50080: e1 b7 ff 17 b #-73852 +// CHECK-FIX: __CortexA53843419_40004: +// CHECK-FIX-NEXT: 50084: 02 00 40 f9 ldr x2, [x0] +// CHECK-FIX-NEXT: 50088: e0 bf ff 17 b #-65664 +// CHECK-FIX: __CortexA53843419_42008: +// CHECK-FIX-NEXT: 5008c: 9b 07 00 f9 str x27, [x28, #8] +// CHECK-FIX-NEXT: 50090: df c7 ff 17 b #-57476 +// CHECK-FIX: __CortexA53843419_44004: +// CHECK-FIX-NEXT: 50094: 0e 0a 40 f9 ldr x14, [x16, #16] +// CHECK-FIX-NEXT: 50098: dc cf ff 17 b #-49296 +// CHECK-FIX: __CortexA53843419_46004: +// CHECK-FIX-NEXT: 5009c: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-FIX-NEXT: 500a0: da d7 ff 17 b #-41112 +// CHECK-FIX: __CortexA53843419_48004: +// CHECK-FIX-NEXT: 500a4: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-FIX-NEXT: 500a8: d8 df ff 17 b #-32928 +// CHECK-FIX: __CortexA53843419_4A008: +// CHECK-FIX-NEXT: 500ac: 0e 06 40 f9 ldr x14, [x16, #8] +// CHECK-FIX-NEXT: 500b0: d7 e7 ff 17 b #-24740 +// CHECK-FIX: __CortexA53843419_4C008: +// CHECK-FIX-NEXT: 500b4: ea 00 40 f9 ldr x10, [x7] +// CHECK-FIX-NEXT: 500b8: d5 ef ff 17 b #-16556 +// CHECK-FIX: __CortexA53843419_4E008: +// CHECK-FIX-NEXT: 500bc: 18 ff 3f f9 str x24, [x24, #32760] +// CHECK-FIX-NEXT: 500c0: d3 f7 ff 17 b #-8372 +// CHECK-FIX: __CortexA53843419_50000: +// CHECK-FIX-NEXT: 500c4: 01 08 40 f9 ldr x1, [x0, #16] +// CHECK-FIX-NEXT: 500c8: cf ff ff 17 b #-196 + + .data .globl dat .globl dat2