Index: ELF/AArch64ErrataFix.h =================================================================== --- /dev/null +++ ELF/AArch64ErrataFix.h @@ -0,0 +1,30 @@ +//===- AArch64ErrataFix.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SECTIONPATCHER_H +#define LLD_ELF_SECTIONPATCHER_H + +#include "lld/Common/LLVM.h" + +namespace lld { +namespace elf { + +class OutputSection; + +// Implementation of the -fix-cortex-a53-843419 which affects early revisions +// of the cortex-a53 when running in the AArch64 execution state. + +// FIXME: Only detects and reports the presence of the instruction sequence that +// can trigger the erratum 843419. +void createA53Errata843419Fixes(ArrayRef OutputSections); + +} // namespace elf +} // namespace lld + +#endif Index: ELF/AArch64ErrataFix.cpp =================================================================== --- /dev/null +++ ELF/AArch64ErrataFix.cpp @@ -0,0 +1,489 @@ +//===- AArch64ErrataFix.cpp -----------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64ErrataFix.h" +#include "Config.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "Relocations.h" +#include "Strings.h" +#include "SyntheticSections.h" +#include "Target.h" + +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; + +using namespace lld; +using namespace lld::elf; + +// This file implements Section Patching for the purpose of working around +// errata in CPUs. The general principle is that an erratum sequence of one or +// more instructions is detected in the instruction stream, one of the +// instructions in the sequence is replaced with a branch to a patch sequence +// of replacement instructions. At the end of the replacement sequence the +// patch branches back to the instruction stream. + +// This technique is only suitable for fixing an erratum when: +// - There is a set of necessary conditions required to trigger the erratum that +// can be detected at static link time. +// - There is a set of replacement instructions that can be used to remove at +// least one of the necessary conditions that trigger the erratum. +// - We can overwrite an instruction in the erratum sequence with a branch to +// the replacement sequence. +// - We can place the replacement sequence within range of the branch. + +// FIXME: +// - At this stage the implementation only supports detection and not fixing, +// this is sufficient to test the decode and recognition of the erratum +// sequence. +// - The implementation here only supports one patch, the AArch64 Cortex-53 +// errata 843419 that affects r0p0, r0p1, r0p2 and r0p4 versions of the core. +// To keep the initial version simple there is no support for multiple +// architectures or selection of different patches. + +// ADRP +// | 1 | immlo (2) | 1 | 0 0 0 0 | immhi (19) | Rd (5) | +static bool isADRP(uint32_t Instr) { + return (Instr & 0x9f000000) == 0x90000000; +} + +// Load and store bit patterns from ARMv8-A ARM ARM. +// Instructions appear in order of appearance starting from table in +// C4.1.3 Loads and Stores. + +// All loads and stores have 1 (at bit postion 27), (0 at bit position 25). +// | op0 x op1 (2) | 1 op2 0 op3 (2) | x | op4 (5) | xxxx | op5 (2) | x (10) | +static bool isLoadStoreClass(uint32_t Instr) { + return (Instr & 0x0a000000) == 0x08000000; +} + +// LDN/STN multiple no offset +// | 0 Q 00 | 1100 | 0 L 00 | 0000 | opcode (4) | size (2) | Rn (5) | Rt (5) | +// LDN/STN multiple post-indexed +// | 0 Q 00 | 1100 | 1 L 0 | Rm (5)| opcode (4) | size (2) | Rn (5) | Rt (5) | +// L == 0 for stores. + +// Utility routine to decode opcode field of LDN/STN multiple structure +// instructions to find the ST1 instructions. +// opcode == 0010 ST1 4 registers. +// opcode == 0110 ST1 3 registers. +// opcode == 0111 ST1 1 register. +// opcode == 1010 ST1 2 registers. +static bool isST1MultipleOpcode(uint32_t Instr) { + return (Instr & 0x0000f000) == 0x00002000 || + (Instr & 0x0000f000) == 0x00006000 || + (Instr & 0x0000f000) == 0x00007000 || + (Instr & 0x0000f000) == 0x0000a000; +} + +static bool isST1Multiple(uint32_t Instr) { + return (Instr & 0xbfff0000) == 0x0c000000 && isST1MultipleOpcode(Instr); +} + +// Writes to Rn (writeback). +static bool isST1MultiplePost(uint32_t Instr) { + return (Instr & 0xbfe00000) == 0x0c800000 && isST1MultipleOpcode(Instr); +} + +// LDN/STN single no offset +// | 0 Q 00 | 1101 | 0 L R 0 | 0000 | opc (3) S | size (2) | Rn (5) | Rt (5)| +// LDN/STN single post-indexed +// | 0 Q 00 | 1101 | 1 L R | Rm (5) | opc (3) S | size (2) | Rn (5) | Rt (5)| +// L == 0 for stores + +// Utility routine to decode opcode field of LDN/STN single structure +// instructions to find the ST1 instructions. +// R == 0 for ST1 and ST3, R == 1 for ST2 and ST4. +// opcode == 000 ST1 8-bit. +// opcode == 010 ST1 16-bit. +// opcode == 100 ST1 32 or 64-bit (Size determines which). +static bool isST1SingleOpcode(uint32_t Instr) { + return (Instr & 0x0040e000) == 0x00000000 || + (Instr & 0x0040e000) == 0x00004000 || + (Instr & 0x0040e000) == 0x00008000; +} + +static bool isST1Single(uint32_t Instr) { + return (Instr & 0xbfff0000) == 0x0d000000 && isST1SingleOpcode(Instr); +} + +// Writes to Rn (writeback). +static bool isST1SinglePost(uint32_t Instr) { + return (Instr & 0xbfe00000) == 0x0d800000 && isST1SingleOpcode(Instr); +} + +static bool isST1(uint32_t Instr) { + return isST1Multiple(Instr) || isST1MultiplePost(Instr) || + isST1Single(Instr) || isST1SinglePost(Instr); +} + +// Load/store exclusive +// | size (2) 00 | 1000 | o2 L o1 | Rs (5) | o0 | Rt2 (5) | Rn (5) | Rt (5) | +// L == 0 for Stores. +static bool isLoadStoreExclusive(uint32_t Instr) { + return (Instr & 0x3f000000) == 0x08000000; +} + +static bool isLoadExclusive(uint32_t Instr) { + return (Instr & 0x3f400000) == 0x08400000; +} + +// Load register literal +// | opc (2) 01 | 1 V 00 | imm19 | Rt (5) | +static bool isLoadLiteral(uint32_t Instr) { + return (Instr & 0x3b000000) == 0x18000000; +} + +// Load/store no-allocate pair +// (offset) +// | opc (2) 10 | 1 V 00 | 0 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) | +// L == 0 for stores. +// Never writes to register +static bool isSTNP(uint32_t Instr) { + return (Instr & 0x3bc00000) == 0x28000000; +} + +// Load/store register pair +// (post-indexed) +// | opc (2) 10 | 1 V 00 | 1 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) | +// L == 0 for stores, V == 0 for Scalar, V == 1 for Simd/FP +// Writes to Rn. +static bool isSTPPost(uint32_t Instr) { + return (Instr & 0x3bc00000) == 0x28800000; +} + +// (offset) +// | opc (2) 10 | 1 V 01 | 0 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) | +static bool isSTPOffset(uint32_t Instr) { + return (Instr & 0x3bc00000) == 0x29000000; +} + +// (pre-index) +// | opc (2) 10 | 1 V 01 | 1 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) | +// Writes to Rn. +static bool isSTPPre(uint32_t Instr) { + return (Instr & 0x3bc00000) == 0x29800000; +} + +static bool isSTP(uint32_t Instr) { + return isSTPPost(Instr) || isSTPOffset(Instr) || isSTPPre(Instr); +} + +// Load/store register (unscaled immediate) +// | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 00 | Rn (5) | Rt (5) | +// V == 0 for Scalar, V == 1 for Simd/FP. +static bool isLoadStoreUnscaled(uint32_t Instr) { + return (Instr & 0x3b000c00) == 0x38000000; +} + +// Load/store register (immediate post-indexed) +// | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 01 | Rn (5) | Rt (5) | +static bool isLoadStoreImmediatePost(uint32_t Instr) { + return (Instr & 0x3b200c00) == 0x38000400; +} + +// Load/store register (unprivileged) +// | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 10 | Rn (5) | Rt (5) | +static bool isLoadStoreUnpriv(uint32_t Instr) { + return (Instr & 0x3b200c00) == 0x38000800; +} + +// Load/store register (immediate pre-indexed) +// | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 11 | Rn (5) | Rt (5) | +static bool isLoadStoreImmediatePre(uint32_t Instr) { + return (Instr & 0x3b200c00) == 0x38000c00; +} + +// Load/store register (register offset) +// | size (2) 11 | 1 V 00 | opc (2) 1 | Rm (5) | option (3) S | 10 | Rn | Rt | +static bool isLoadStoreRegisterOff(uint32_t Instr) { + return (Instr & 0x3b200c00) == 0x38200800; +} + +// Load/store register (unsigned immediate) +// | size (2) 11 | 1 V 01 | opc (2) | imm12 | Rn (5) | Rt (5) | +static bool isLoadStoreRegisterUnsigned(uint32_t Instr) { + return (Instr & 0x3b000000) == 0x39000000; +} + +// Rt is always in bit position 0 - 4. +static uint32_t getRt(uint32_t Instr) { return (Instr & 0x1f); } + +// Rn is always in bit position 5 - 9. +static uint32_t getRn(uint32_t Instr) { return (Instr >> 5) & 0x1f; } + +// C4.1.2 Branches, Exception Generating and System instructions +// | op0 (3) 1 | 01 op1 (4) | x (22) | +// op0 == 010 101 op1 == 0xxx Conditional Branch. +// op0 == 110 101 op1 == 1xxx Unconditional Branch Register. +// op0 == x00 101 op1 == xxxx Unconditional Branch immediate. +// op0 == x01 101 op1 == 0xxx Compare and branch immediate. +// op0 == x01 101 op1 == 1xxx Test and branch immediate. +static bool isBranch(uint32_t Instr) { + return ((Instr & 0xfe000000) == 0xd6000000) || // Cond branch. + ((Instr & 0xfe000000) == 0x54000000) || // Uncond branch reg. + ((Instr & 0x7c000000) == 0x14000000) || // Uncond branch imm. + ((Instr & 0x7c000000) == 0x34000000); // Compare and test branch. +} + +static bool isV8SingleRegisterNonStructureLoadStore(uint32_t Instr) { + return isLoadStoreUnscaled(Instr) || isLoadStoreImmediatePost(Instr) || + isLoadStoreUnpriv(Instr) || isLoadStoreImmediatePre(Instr) || + isLoadStoreRegisterOff(Instr) || isLoadStoreRegisterUnsigned(Instr); +} + +// Note that this function refers to v8.0 only and does not include the +// additional load and store instructions added for in later revisions of +// the architecture such as the Atomic memory operations introduced +// in v8.1. +static bool isV8NonStructureLoad(uint32_t Instr) { + if (isLoadExclusive(Instr)) + return true; + if (isLoadLiteral(Instr)) + return true; + else if (isV8SingleRegisterNonStructureLoadStore(Instr)) { + // For Load and Store single register, Loads are derived from a + // combination of the Size, V and Opc fields. + uint32_t Size = (Instr >> 30) & 0xff; + uint32_t V = (Instr >> 26) & 0x1; + uint32_t Opc = (Instr >> 22) & 0x3; + // For the load and store instructions that we are decoding. + // Opc == 0 are all stores. + // Opc == 1 with a couple of exceptions are loads. The exceptions are: + // Size == 00 (0), V == 1, Opc == 10 (2) which is a store and + // Size == 11 (3), V == 0, Opc == 10 (2) which is a prefetch. + return Opc != 0 && !(Size == 0 && V == 1 && Opc == 2) && + !(Size == 3 && V == 0 && Opc == 2); + } + return false; +} + +// The following decode instructions are only complete up to the instructions +// needed for errata 843419. + +// Instruction with writeback updates the index register after the load/store. +static bool hasWriteback(uint32_t Instr) { + return isLoadStoreImmediatePre(Instr) || + isLoadStoreImmediatePost(Instr) || isSTPPre(Instr) || + isSTPPost(Instr) || isST1SinglePost(Instr) || + isST1MultiplePost(Instr); +} + +// For the load and store class of instructions, a load can write to the +// destination register, a load and a store can write to the base register when +// the instruction has writeback. +static bool LoadStoreWritesToReg(uint32_t Instr, uint32_t Reg) { + return (isV8NonStructureLoad(Instr) && getRt(Instr) == Reg) || + (hasWriteback(Instr) && getRn(Instr) == Reg); +} + +// Scanner for Cortex-A53 errata 843419 +// Full details are available in the Cortex A53 MPCore revision 0 Software +// Developers Errata Notice (ARM-EPM-048406). +// +// The instruction sequence that triggers the erratum is common in compiled +// AArch64 code, however it is sensitive to the offset of the sequence within +// a 4k page. This means that by scanning and fixing the patch after we have +// assigned addresses we only need to disassemble and fix instances of the +// sequence in the range of affected offsets. +// +// In summary the erratum conditions are a series of 4 instructions: +// 1.) An ADRP instruction that writes to register Rn with low 12 bits of +// address of instruction either 0xff8 or 0xffc. +// 2.) A load or store instruction that can be: +// - A single register load or store, of either integer or vector registers. +// - An STP or STNP, of either integer or vector registers. +// - An Advanced SIMD ST1 store instruction. +// - Must not write to Rn, but may optionally read from it. +// 3.) An optional instruction that is not a branch and does not write to Rn. +// 4.) A load or store from the Load/store register (unsigned immediate) class +// that uses Rn as the base address register. +// +// Note that we do not attempt to scan for Sequence 2 as described in the +// Software Developers Errata Notice as this has been assessed to be extremely +// unlikely to occur in compiled code. This matches gold and ld.bfd behavior. + +// Return true if the Instruction sequence Adrp, Instr2, and Instr4 match +// the erratum sequence. The Adrp, Instr2 and Instr4 correspond to 1.), 2.), +// and 4.) in the Scanner for Cortex-A53 errata comment above. +static bool is843419ErratumSequence(uint32_t Instr1, uint32_t Instr2, + uint32_t Instr4) { + if (!isADRP(Instr1)) + return false; + + uint32_t Rn = getRt(Instr1); + return isLoadStoreClass(Instr2) && + (isLoadStoreExclusive(Instr2) || isLoadLiteral(Instr2) || + isV8SingleRegisterNonStructureLoadStore(Instr2) || + isSTP(Instr2) || isSTNP(Instr2) || isST1(Instr2)) && + !LoadStoreWritesToReg(Instr2, Rn) && + isLoadStoreRegisterUnsigned(Instr4) && getRn(Instr4) == Rn; +} + +static void report843419Fix(uint64_t AdrpAddr) { + if (!Config->Verbose) + return; + message("detected cortex-a53-843419 erratum sequence starting at " + + utohexstr(AdrpAddr) + " in unpatched output."); +} + +// Scan the instruction sequence starting at Offset Off from the base of +// InputSection IS. We update Off in this function rather than in the caller as +// we can skip ahead much further into the section when we know how many +// instructions we've scanned. +// Return the offset of the load or store instruction in IS that we want to +// patch or 0 if no patch required. +static uint64_t scanCortexA53Errata843419(InputSection *IS, uint64_t &Off, + uint64_t Limit) { + uint64_t ISAddr = IS->getParent()->Addr + IS->OutSecOff; + const uint8_t *Buf = IS->Data.begin(); + + // Advance Off so that (ISAddr + Off) modulo 0x1000 is at least 0xff8. + uint64_t InitialPageOff = (ISAddr + Off) & 0xfff; + if (InitialPageOff < 0xff8) + Off += 0xff8 - InitialPageOff; + + bool OptionalAllowed = Limit - Off > 12; + if (Off >= Limit || Limit - Off < 12) { + // Need at least 3 4-byte sized instructions to trigger erratum. + Off = Limit; + return 0; + } + + uint64_t PatchOff = 0; + uint32_t Instr1 = *reinterpret_cast(Buf + Off); + uint32_t Instr2 = *reinterpret_cast(Buf + Off + 4); + uint32_t Instr3 = *reinterpret_cast(Buf + Off + 8); + if (is843419ErratumSequence(Instr1, Instr2, Instr3)) + PatchOff = Off + 8; + else if (OptionalAllowed && !isBranch(Instr3)) { + uint32_t Instr4 = *reinterpret_cast(Buf + Off + 12); + if (is843419ErratumSequence(Instr1, Instr2, Instr4)) + PatchOff = Off + 12; + } + if (((ISAddr + Off) & 0xfff) == 0xff8) + Off += 4; + else + Off += 0xffc; + return PatchOff; +} + +// The AArch64 ABI permits data in executable sections. We must avoid scanning +// this data as if it were instructions to avoid false matches. +// The ABI Section 4.5.4 Mapping symbols; defines local symbols that describe +// half open intervals [Symbol Value, Next Symbol Value) of code and data +// within sections. If there is no next symbol then the half open interval is +// [Symbol Value, End of section). The type, code or data, is determined by the +// mapping symbol name, $x for code, $d for data. +std::map> static makeAArch64SectionMap() { + std::map> SectionMap; + auto IsCodeMapSymbol = [](const Symbol *B) { + return B->getName() == "$x" || B->getName().startswith("$x."); + }; + auto IsDataMapSymbol = [](const Symbol *B) { + return B->getName() == "$d" || B->getName().startswith("$d."); + }; + + // Collect mapping symbols for every executable InputSection. + for (InputFile *File : ObjectFiles) { + auto *F = cast>(File); + for (Symbol *B : F->getLocalSymbols()) { + auto *Def = dyn_cast(B); + if (!Def) + continue; + if (!IsCodeMapSymbol(Def) && !IsDataMapSymbol(Def)) + continue; + if (auto *Sec = dyn_cast(Def->Section)) { + if (Sec->Flags & SHF_EXECINSTR) + SectionMap[Sec].push_back(Def); + } + } + } + // For each InputSection make sure the mapping symbols are in sorted in + // ascending order and free from consecutive runs of mapping symbols with + // the same type. For example we must remove the redundant $d.1 from $x.0 + // $d.0 $d.1 $x.1. + for (auto &KV : SectionMap) { + std::vector &MapSyms = KV.second; + if (MapSyms.size() > 1) { + std::stable_sort(MapSyms.begin(), MapSyms.end(), + [](const Defined *A, const Defined *B) { + return A->Value < B->Value; + }); + MapSyms.erase( + std::unique(MapSyms.begin(), MapSyms.end(), + [=](const Defined *A, const Defined *B) { + return (IsCodeMapSymbol(A) && IsCodeMapSymbol(B)) || + (IsDataMapSymbol(A) && IsDataMapSymbol(B)); + }), + MapSyms.end()); + } + } + return SectionMap; +} + +static void scanInputSectionDescription(std::vector &MapSyms, + InputSection *IS) { + // Use SectionMap to make sure we only scan code and not inline data. + // We have already sorted MapSyms in ascending order and removed + // consecutive mapping symbols of the same type. Our range of + // executable instructions to scan is therefore [CodeSym->Value, + // DataSym->Value) or [CodeSym->Value, section size). + auto CodeSym = llvm::find_if(MapSyms, [&](const Defined *MS) { + return MS->getName().startswith("$x"); + }); + + while (CodeSym != MapSyms.end()) { + auto DataSym = std::next(CodeSym); + uint64_t Off = (*CodeSym)->Value; + uint64_t Limit = + (DataSym == MapSyms.end()) ? IS->Data.size() : (*DataSym)->Value; + + while (Off < Limit) { + uint64_t StartAddr = IS->getParent()->Addr + IS->OutSecOff + Off; + if (scanCortexA53Errata843419(IS, Off, Limit)) + report843419Fix(StartAddr); + } + if (DataSym == MapSyms.end()) + break; + CodeSym = std::next(DataSym); + } +} + +// Scan all the executable code in an AArch64 link to detect the Cortex-A53 +// erratum 843419. +// FIXME: The current implementation only scans for the erratum sequence, it +// does not attempt to fix it. +void lld::elf::createA53Errata843419Fixes( + ArrayRef OutputSections) { + std::map> SectionMap = + makeAArch64SectionMap(); + + for (OutputSection *OS : OutputSections) { + if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR)) + continue; + for (BaseCommand *BC : OS->SectionCommands) + if (auto *ISD = dyn_cast(BC)) { + for (InputSection *IS : ISD->Sections) { + // LLD doesn't use the erratum sequence in SyntheticSections. + if (isa(IS)) + continue; + scanInputSectionDescription(SectionMap[IS], IS); + } + } + } +} Index: ELF/CMakeLists.txt =================================================================== --- ELF/CMakeLists.txt +++ ELF/CMakeLists.txt @@ -7,6 +7,7 @@ endif() add_lld_library(lldELF + AArch64ErrataFix.cpp Arch/AArch64.cpp Arch/AMDGPU.cpp Arch/ARM.cpp Index: ELF/Config.h =================================================================== --- ELF/Config.h +++ ELF/Config.h @@ -116,6 +116,8 @@ bool EmitRelocs; bool EnableNewDtags; bool ExportDynamic; + bool FatalWarnings; + bool FixCortexA53Errata843419; bool GcSections; bool GdbIndex; bool GnuHash = false; Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -273,6 +273,9 @@ if (Config->EMachine == EM_MIPS && Config->GnuHash) error("the .gnu.hash section is not compatible with the MIPS target."); + if (Config->FixCortexA53Errata843419 && Config->EMachine != EM_AARCH64) + error("--fix-cortex-a53-843419 is only supported on AArch64 targets."); + if (Config->Pie && Config->Shared) error("-shared and -pie may not be used together"); @@ -654,6 +657,7 @@ Args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); Config->FilterList = getArgs(Args, OPT_filter); Config->Fini = Args.getLastArgValue(OPT_fini, "_fini"); + Config->FixCortexA53Errata843419 = Args.hasArg(OPT_fix_cortex_a53_843419); Config->GcSections = Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false); Config->GdbIndex = Args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false); Config->ICF = Args.hasFlag(OPT_icf_all, OPT_icf_none, false); Index: ELF/Options.td =================================================================== --- ELF/Options.td +++ ELF/Options.td @@ -120,6 +120,9 @@ defm fini: Eq<"fini">, HelpText<"Specify a finalizer function">, MetaVarName<"">; +def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">, + HelpText<"Apply fixes for AArch64 Cortex-A53 erratum 843419">; + def full_shutdown : F<"full-shutdown">, HelpText<"Perform a full shutdown instead of calling _exit">; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Writer.h" +#include "AArch64ErrataFix.h" #include "Config.h" #include "Filesystem.h" #include "LinkerScript.h" @@ -1349,6 +1350,10 @@ Changed |= In::RelaDyn->updateAllocSize(); } while (Changed); } + if (Config->EMachine == EM_AARCH64 && Config->FixCortexA53Errata843419) { + Script->assignAddresses(); + createA53Errata843419Fixes(OutputSections); + } // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result Index: test/ELF/aarch64-cortex-a53-843419-address.s =================================================================== --- /dev/null +++ test/ELF/aarch64-cortex-a53-843419-address.s @@ -0,0 +1,138 @@ +// REQUIRES: aarch64 +// RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o +// RUN: echo "SECTIONS { \ +// RUN: .text : { *(.text) *(.text.*) *(.newisd) } \ +// RUN: .text2 : { *.(newos) } \ +// RUN: .data : { *(.data) } }" > %t.script +// RUN: ld.lld --script %t.script -fix-cortex-a53-843419 -verbose %t.o -o %t2 | FileCheck %s + +// Test cases for Cortex-A53 Erratum 843419 that involve interactions +// between the generated patches and the address of sections + +// See ARM-EPM-048406 Cortex_A53_MPCore_Software_Developers_Errata_Notice.pdf +// for full erratum details. +// In Summary +// 1.) +// ADRP (0xff8 or 0xffc) +// 2.) +// - load or store single register or either integer or vector registers +// - STP or STNP of either vector or vector registers +// - Advanced SIMD ST1 store instruction +// Must not write Rn +// 3.) optional instruction, can't be a branch, must not write Rn, may read Rn +// 4.) A load or store instruction from the Load/Store register unsigned +// immediate class using Rn as the base register + +// An aarch64 section can contain ranges of literal data embedded within the +// code, these ranges are encoded with mapping symbols. This tests that we +// can match the erratum sequence in code, but not data +// - We can handle more than one patch per code range (denoted by mapping +// symbols) +// - We can handle a patch in more than range of code, with literal data +// inbetween +// - We can handle redundant mapping symbols (two or more consecutive mapping +// symbols with the same type) +// - We can ignore erratum sequences in multiple literal data ranges. + +// CHECK: detected cortex-a53-843419 erratum sequence starting at FF8 in unpatched output. + + .section .text.01, "ax", %progbits + .balign 4096 + .space 4096 - 8 + .globl t3_ff8_ldr + .type t3_ff8_ldr, %function +t3_ff8_ldr: + adrp x0, dat + ldr x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + ret + + + // create a redundant mapping symbol as we are already in a $x range + // some object producers unconditionally generate a mapping symbol on + // every symbol so we need to handle the case of $x $x + .local $x.999 +$x.999: +// CHECK-NEXT: detected cortex-a53-843419 erratum sequence starting at 1FFC in unpatched output. + .globl t3_ffc_ldrsimd + .type t3_ffc_ldrsimd, %function + .space 4096 - 12 +t3_ffc_ldrsimd: + adrp x0, dat + ldr s1, [x1, #0] + ldr x2, [x0, :got_lo12:dat] + ret + +// Inline data containing bit pattern of erratum sequence, expect no patch + .globl t3_ffc_ldralldata + .type t3_ff8_ldralldata, %function + .space 4096 - 20 +t3_ff8_ldralldata: + // 0x90000000 = adrp x0, #0 + .byte 0x00 + .byte 0x00 + .byte 0x00 + .byte 0x90 + // 0xf9400021 = ldr x1, [x1] + .byte 0x21 + .byte 0x00 + .byte 0x40 + .byte 0xf9 + // 0xf9400000 = ldr x0, [x0] + .byte 0x00 + .byte 0x00 + .byte 0x40 + .byte 0xf9 + // Check that we can recognise the erratum sequence post literal data + +// CHECK-NEXT: detected cortex-a53-843419 erratum sequence starting at 3FF8 in unpatched output. + + .space 4096 - 12 + .globl t3_ffc_ldr + .type t3_ffc_ldr, %function + t3_ffc_ldr: + adrp x0, dat + ldr x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + ret + + .section .text.02, "ax", %progbits + .space 4096 - 12 + + // Start a new InputSectionDescription (see Linker Script) so the + // start address will be + // affected by any patches added to previous InputSectionDescription + +// CHECK: detected cortex-a53-843419 erratum sequence starting at 4FFC in unpatched output. + + .section .newisd, "ax", %progbits + .globl t3_ffc_str + .type t3_ffc_str, %function +t3_ffc_str: + adrp x0, dat + str x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + ret + .space 4096 - 20 + +// CHECK: detected cortex-a53-843419 erratum sequence starting at 5FF8 in unpatched output. + + // Start a new OutputSection (see Linker Script) so the + // start address will be + // affected by any patches added to previous InputSectionDescription + .section .newos, "ax", %progbits + .globl t3_ff8_str + .type t3_ff8_str, %function +t3_ff8_str: + adrp x0, dat + str x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + ret + .globl _start + .type _start, %function +_start: + ret + + .data + .globl dat +dat: .word 0 Index: test/ELF/aarch64-cortex-a53-843419-cli.s =================================================================== --- /dev/null +++ test/ELF/aarch64-cortex-a53-843419-cli.s @@ -0,0 +1,10 @@ +// REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +// RUN: not ld.lld %t -fix-cortex-a53-843419 -o %t2 2>&1 | FileCheck %s + +// CHECK: --fix-cortex-a53-843419 is only supported on AArch64 targets. +.globl entry +.text + .quad 0 +entry: + ret Index: test/ELF/aarch64-cortex-a53-843419-nopatch.s =================================================================== --- /dev/null +++ test/ELF/aarch64-cortex-a53-843419-nopatch.s @@ -0,0 +1,338 @@ +// REQUIRES: aarch64 +// RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o +// RUN: ld.lld -fix-cortex-a53-843419 -verbose -t %t.o -o %t2 | FileCheck %s +// Test cases for Cortex-A53 Erratum 843419 that we don't expect to recognize +// as needing a patch as one or more of the conditions isn't satisfied. +// See ARM-EPM-048406 Cortex_A53_MPCore_Software_Developers_Errata_Notice.pdf +// for full erratum details. +// In Summary +// 1.) +// ADRP (0xff8 or 0xffc) +// 2.) +// - load or store single register or either integer or vector registers +// - STP or STNP of either vector or vector registers +// - Advanced SIMD ST1 store instruction +// Must not write Rn +// 3.) optional instruction, can't be a branch, must not write Rn, may read Rn +// 4.) A load or store instruction from the Load/Store register unsigned +// immediate class using Rn as the base register + +// Expect no patches detected. +// CHECK-NOT: detected cortex-a53-843419 erratum sequence + +// erratum sequence but adrp (address & 0xfff) is not 0xff8 or 0xffc + .section .text.01, "ax", %progbits + .balign 4096 + .globl t3_0_ldr + .type t3_ff8_ldr, %function +t3_0_ldr: + adrp x0, dat + ldr x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + ret + + .section .text.02, "ax", %progbits + .balign 4096 + .globl t3_ff4_ldr + .space 4096 - 12 + .type t3_ff4_ldr, %function +t3_ff4_ldr: + adrp x0, dat + ldr x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + ret + +// Close matches for erratum sequence, with adrp at correct address but +// instruction 2 is a load or store but not one that matches the erratum +// conditions, but with a similar encoding to an instruction that does. + + // ldp is not part of sequence, although stp is. + .section .text.03, "ax", %progbits + .balign 4096 + .globl t3_ff8_ldp + .type t3_ff8_ldp, %function + .space 4096 - 8 +t3_ff8_ldp: + adrp x16, dat + ldp x1,x2, [x3, #0] + ldr x13, [x16, :got_lo12:dat] + ret + + // st2 is not part of sequence although st1 is. + .section .text.04, "ax", %progbits + .balign 4096 + .globl t3_ffc_st2 + .type t3_ffc_st2, %function + .space 4096 - 4 +t3_ffc_st2: + adrp x16, dat + st2 { v0.16b, v1.16b }, [x1] + ldr x13, [x16, :got_lo12:dat] + ret + + // st3 is not part of sequence although st1 is. + .section .text.05, "ax", %progbits + .balign 4096 + .globl t3_ffc_st3 + .type t3_ffc_st3, %function + .space 4096 - 4 +t3_ffc_st3: + adrp x16, dat + st3 { v0.16b, v1.16b, v2.16b }, [x1], x2 + ldr x13, [x16, :got_lo12:dat] + ret + + // ld1 is not part of sequence although st1 is. + .section .text.06, "ax", %progbits + .balign 4096 + .globl t3_ffc_ld2 + .type t3_ffc_st3, %function + .space 4096 - 4 +t3_ffc_ld1: + adrp x16, dat + ld1 { v0.16b }, [x2], x3 + ldr x13, [x16, :got_lo12:dat] + ret + + // ldnp is not part of sequence although stnp is. + .section .text.07, "ax", %progbits + .balign 4096 + .globl t4_ff8_ldnp + .type t4_ff8_ldnp, %function + .space 4096 - 8 +t4_ff8_ldnp: + adrp x7, dat + ldnp x1,x2, [x3, #0] + nop + ldr x10, [x7, :got_lo12:dat] + ret + +// Close match for erratum sequence, with adrp at correct address but +// instruction 2 writes to Rn, with Rn as either destination or as the +// transfer register but with writeback. + + // ldr instruction writes to Rn + .section .text.08, "ax", %progbits + .balign 4096 + .globl t3_ff8_ldr + .type t3_ff8_ldr, %function + .space 4096 - 8 +t3_ff8_ldr: + adrp x0, dat + ldr x0, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + ret + + // str instruction writes to Rn via writeback (pre index) + .section .text.09, "ax", %progbits + .balign 4096 + .globl t3_ff8_str + .type t3_ff8_str, %function + .space 4096 - 8 +t3_ff8_str: + adrp x0, dat + str x1, [x0, #4]! + ldr x0, [x0, :got_lo12:dat] + ret + + // ldr instruction writes to Rn via writeback (post index) + .section .text.09, "ax", %progbits + .balign 4096 + .globl t3_ffc_ldr + .type t3_ffc_ldr, %function + .space 4096 - 8 +t3_ffc_ldr: + adrp x0, dat + ldr x1, [x0], 0x8 + ldr x0, [x0, :got_lo12:dat] + ret + + // stp writes to Rn via writeback (pre index) + .section .text.10, "ax", %progbits + .balign 4096 + .globl t4_ffc_stppre + .type t4_ffc_stppre, %function + .space 4096 - 4 +t4_ffc_stppre: + adrp x16, dat + stp x1,x2, [x16, #16]! + mul x3, x16, x16 + ldr x14, [x16, #8] + ret + + // stp writes to Rn via writeback (post index) + .section .text.11, "ax", %progbits + .balign 4096 + .globl t4_ff8_stppost + .type t4_ff8_stppost, %function + .space 4096 - 8 +t4_ff8_stppost: + adrp x16, dat + stp x1,x2, [x16], #16 + mul x3, x16, x16 + ldr x14, [x16, #8] + ret + + // st1 writes to Rn via writeback + .section .text.12, "ax", %progbits + .balign 4096 + .globl t3_ff8_st1 + .type t3_ff8_st1, %function + .space 4096 - 8 +t3_ff8_st1: + adrp x16, dat + st1 { v0.16b}, [x16], x2 + ldr x13, [x16, :got_lo12:dat] + ret + +// Close match for erratum sequence, but with optional instruction 3 a branch + + // function call via immediate + .section .text.13, "ax", %progbits + .balign 4096 + .globl t4_ffc_blimm + .type t4_ffc_blimm, %function + .space 4096 - 4 +t4_ffc_blimm: + adrp x7, dat + stnp x1,x2, [x3, #0] + bl t4_ffc_blimm + ldr x10, [x7, :got_lo12:dat] + ret + + // function call via register + .section .text.14, "ax", %progbits + .balign 4096 + .globl t4_ffc_blreg + .type t4_ffc_blreg, %function + .space 4096 - 4 +t4_ffc_blreg: + adrp x7, dat + stnp x1,x2, [x3, #0] + blr x4 + ldr x10, [x7, :got_lo12:dat] + ret + + // Unconditional branch immediate + .section .text.15, "ax", %progbits + .balign 4096 + .globl t4_ffc_branchimm + .type t4_ffc_branchimm, %function + .space 4096 - 4 +t4_ffc_branchimm: + adrp x7, dat + stnp x1,x2, [x3, #0] + b t4_ffc_branchimm + ldr x10, [x7, :got_lo12:dat] + ret + + // Unconditional branch register + .section .text.16, "ax", %progbits + .balign 4096 + .globl t4_ffc_branchreg + .type t4_ffc_branchreg, %function + .space 4096 - 4 +t4_ffc_branchreg: + adrp x7, dat + stnp x1,x2, [x3, #0] + br x4 + ldr x10, [x7, :got_lo12:dat] + ret + + // Conditional branch + .section .text.17, "ax", %progbits + .balign 4096 + .globl t4_ffc_branchcond + .type t4_ffc_branchcond, %function + .space 4096 - 4 +t4_ffc_branchcond: + adrp x7, dat + stnp x1,x2, [x3, #0] + cbz x5, t4_ffc_branchcond + ldr x10, [x7, :got_lo12:dat] + ret + + // Conditional branch immediate + .section .text.18, "ax", %progbits + .balign 4096 + .globl t4_ffc_branchcondimm + .type t4_ffc_branchcondimm, %function + .space 4096 - 4 +t4_ffc_branchcondimm: + adrp x7, dat + stnp x1,x2, [x3, #0] + beq t4_ffc_branchcondimm + ldr x10, [x7, :got_lo12:dat] + ret + +// Bitpattern matches erratum sequence but either all or part of the sequence +// is in inline literal data + .section .text.19, "ax", %progbits + .balign 4096 + .globl t3_ffc_ldrtraildata + .type t3_ff8_ldrtraildata, %function + .space 4096 - 8 +t3_ff8_ldrtraildata: + adrp x0, dat + ldr x1, [x1, #0] + // 0xf9400000 = ldr x0, [x0] + .byte 0x00 + .byte 0x00 + .byte 0x40 + .byte 0xf9 + ldr x0, [x0, :got_lo12:dat] + ret + + .section .text.20, "ax", %progbits + .balign 4096 + .globl t3_ffc_ldrpredata + .type t3_ff8_ldrpredata, %function + .space 4096 - 8 +t3_ff8_ldrpredata: + // 0x90000000 = adrp x0, #0 + .byte 0x00 + .byte 0x00 + .byte 0x00 + .byte 0x90 + ldr x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + ret + + .section .text.21, "ax", %progbits + .balign 4096 + .globl t3_ffc_ldralldata + .type t3_ff8_ldralldata, %function + .space 4096 - 8 +t3_ff8_ldralldata: + // 0x90000000 = adrp x0, #0 + .byte 0x00 + .byte 0x00 + .byte 0x00 + .byte 0x90 + // 0xf9400021 = ldr x1, [x1] + .byte 0x21 + .byte 0x00 + .byte 0x40 + .byte 0xf9 + // 0xf9400000 = ldr x0, [x0] + .byte 0x00 + .byte 0x00 + .byte 0x40 + .byte 0xf9 + + ret + + .text + .globl _start + .type _start, %function +_start: + ret + + + + + +// Bitpattern matches erratum sequence but section is not executable + .data + .globl dat +dat: .word 0 Index: test/ELF/aarch64-cortex-a53-843419-recognize.s =================================================================== --- /dev/null +++ test/ELF/aarch64-cortex-a53-843419-recognize.s @@ -0,0 +1,336 @@ +// REQUIRES: aarch64 +// RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o +// RUN: ld.lld -fix-cortex-a53-843419 -verbose %t.o -o %t2 | FileCheck -check-prefix CHECK-PRINT %s + +// Test cases for Cortex-A53 Erratum 843419 +// See ARM-EPM-048406 Cortex_A53_MPCore_Software_Developers_Errata_Notice.pdf +// for full erratum details. +// In Summary +// 1.) +// ADRP (0xff8 or 0xffc) +// 2.) +// - load or store single register or either integer or vector registers +// - STP or STNP of either vector or vector registers +// - Advanced SIMD ST1 store instruction +// Must not write Rn +// 3.) optional instruction, can't be a branch, must not write Rn, may read Rn +// 4.) A load or store instruction from the Load/Store register unsigned +// immediate class using Rn as the base register + +// Each section contains a sequence of instructions that should be recognized +// as erratum 843419. The test cases cover the major variations such as: +// adrp starts at 0xfff8 or 0xfffc +// Variations in instruction class for instruction 2 +// Optional instruction 3 present or not +// Load or store for instruction 4. + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 21FF8 in unpatched output. + .section .text.01, "ax", %progbits + .balign 4096 + .globl t3_ff8_ldr + .type t3_ff8_ldr, %function + .space 4096 - 8 +t3_ff8_ldr: + adrp x0, dat1 + ldr x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat1] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 23FF8 in unpatched output. + .section .text.02, "ax", %progbits + .balign 4096 + .globl t3_ff8_ldrsimd + .type t3_ff8_ldrsimd, %function + .space 4096 - 8 +t3_ff8_ldrsimd: + adrp x0, dat2 + ldr s1, [x1, #0] + ldr x2, [x0, :got_lo12:dat2] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 25FFC in unpatched output. + .section .text.03, "ax", %progbits + .balign 4096 + .globl t3_ffc_ldrpost + .type t3_ffc_ldrpost, %function + .space 4096 - 4 +t3_ffc_ldrpost: + adrp x0, dat3 + ldr s1, [x1], #8 + ldr x3, [x0, :got_lo12:dat3] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 27FF8 in unpatched output. + .section .text.04, "ax", %progbits + .balign 4096 + .globl t3_ff8_strpre + .type t3_ff8_strpre, %function + .space 4096 - 8 +t3_ff8_strpre: + adrp x0, dat1 + str s1, [x1, #8]! + ldr x2, [x0, :lo12:dat1] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 29FFC in unpatched output. + .section .text.05, "ax", %progbits + .balign 4096 + .globl t3_ffc_str + .type t3_ffc_str, %function + .space 4096 - 4 +t3_ffc_str: + adrp x28, dat2 + str x2, [x2, #0] + str x28, [x28, :lo12:dat2] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2BFFC in unpatched output. + .section .text.06, "ax", %progbits + .balign 4096 + .globl t3_ffc_strsimd + .type t3_ffc_strsimd, %function + .space 4096 - 4 +t3_ffc_strsimd: + adrp x28, dat3 + str w4, [x2, #0] + str x4, [x28, :lo12:dat3] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2DFF8 in unpatched output. + .section .text.07, "ax", %progbits + .balign 4096 + .globl t3_ff8_ldrunpriv + .type t3_ff8_ldrunpriv, %function + .space 4096 - 8 +t3_ff8_ldrunpriv: + adrp x29, dat1 + ldtrb w1, [x2, #0] + ldr x29, [x29, :got_lo12:dat1] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 2FFFC in unpatched output. + .section .text.08, "ax", %progbits + .balign 4096 + .globl t3_ffc_ldur + .type t3_ffc_ldur, %function + .space 4096 - 4 +t3_ffc_ldur: + adrp x29, dat2 + ldur w2, [x2, #4] + ldr x29, [x29, :got_lo12:dat2] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 31FFC in unpatched output. + .section .text.09, "ax", %progbits + .balign 4096 + .globl t3_ffc_sturh + .type t3_ffc_sturh, %function + .space 4096 - 4 +t3_ffc_sturh: + adrp x18, dat3 + sturh w3, [x2, #4] + ldr x1, [x18, :got_lo12:dat3] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 33FF8 in unpatched output. + .section .text.10, "ax", %progbits + .balign 4096 + .globl t3_ff8_literal + .type t3_ff8_literal, %function + .space 4096 - 8 +t3_ff8_literal: + adrp x18, dat1 + ldr x3, t3_ff8_literal + ldr x18, [x18, :lo12:dat1] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 35FFC in unpatched output. + .section .text.11, "ax", %progbits + .balign 4096 + .globl t3_ffc_register + .type t3_ffc_register, %function + .space 4096 - 4 +t3_ffc_register: + adrp x15, dat2 + ldr x3, [x2, x1] + ldr x10, [x15, :lo12:dat2] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 37FF8 in unpatched output. + .section .text.12, "ax", %progbits + .balign 4096 + .globl t3_ff8_stp + .type t3_ff8_stp, %function + .space 4096 - 8 +t3_ff8_stp: + adrp x16, dat3 + stp x1,x2, [x3, #0] + ldr x13, [x16, :lo12:dat3] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 39FFC in unpatched output. + .section .text.13, "ax", %progbits + .balign 4096 + .globl t3_ffc_stnp + .type t3_ffc_stnp, %function + .space 4096 - 4 +t3_ffc_stnp: + adrp x7, dat1 + stnp x1,x2, [x3, #0] + ldr x9, [x7, :lo12:dat1] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3BFFC in unpatched output. + .section .text.14, "ax", %progbits + .balign 4096 + .globl t3_ffc_st1singlepost + .type t3_ffc_st1singlepost, %function + .space 4096 - 4 +t3_ffc_st1singlepost: + adrp x23, dat2 + st1 { v0.16b }, [x1], x2 + ldr x22, [x23, :lo12:dat2] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3DFF8 in unpatched output. + .section .text.15, "ax", %progbits + .balign 4096 + .globl t3_ff8_st1multiple + .type t3_ff8_st1muliple, %function + .space 4096 - 8 +t3_ff8_st1multiple: + adrp x23, dat3 + st1 { v0.16b, v1.16b }, [x1] + ldr x24, [x23, :lo12:dat3] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 3FFF8 in unpatched output. + .section .text.16, "ax", %progbits + .balign 4096 + .globl t4_ff8_ldr + .type t4_ff8_ldr, %function + .space 4096 - 8 +t4_ff8_ldr: + adrp x0, dat1 + ldr x1, [x1, #0] + add x2, x2, x0 + ldr x2, [x0, :got_lo12:dat1] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 41FFC in unpatched output. + .section .text.17, "ax", %progbits + .balign 4096 + .globl t4_ffc_str + .type t4_ffc_str, %function + .space 4096 - 4 +t4_ffc_str: + adrp x28, dat2 + str x2, [x2, #0] + sub x0, x1, x2 + str x27, [x28, :got_lo12:dat2] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 43FF8 in unpatched output. + .section .text.18, "ax", %progbits + .balign 4096 + .globl t4_ff8_stp + .type t4_ff8_stp, %function + .space 4096 - 8 +t4_ff8_stp: + adrp x16, dat3 + stp x1,x2, [x3, #0] + mul x3, x16, x16 + ldr x14, [x16, :got_lo12:dat3] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 45FF8 in unpatched output. + .section .text.19, "ax", %progbits + .balign 4096 + .globl t4_ff8_stppre + .type t4_ff8_stppre, %function + .space 4096 - 8 +t4_ff8_stppre: + adrp x16, dat1 + stp x1,x2, [x3, #16]! + mul x3, x16, x16 + ldr x14, [x16, #8] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 47FF8 in unpatched output. + .section .text.20, "ax", %progbits + .balign 4096 + .globl t4_ff8_stppost + .type t4_ff8_stppost, %function + .space 4096 - 8 +t4_ff8_stppost: + adrp x16, dat2 + stp x1,x2, [x3], #16 + mul x3, x16, x16 + ldr x14, [x16, #8] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 49FFC in unpatched output. + .section .text.21, "ax", %progbits + .balign 4096 + .globl t4_ffc_stpsimd + .type t4_ffc_stpsimd, %function + .space 4096 - 4 +t4_ffc_stpsimd: + adrp x16, dat3 + stp q1,q2, [x3, #0] + mul x3, x16, x16 + ldr x14, [x16, #8] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4BFFC in unpatched output. + .section .text.22, "ax", %progbits + .balign 4096 + .globl t4_ffc_stnp + .type t4_ffc_stnp, %function + .space 4096 - 4 +t4_ffc_stnp: + adrp x7, dat1 + stnp x1,x2, [x3, #0] + nop + ldr x10, [x7, :got_lo12:dat1] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4DFFC in unpatched output. + .section .text.23, "ax", %progbits + .balign 4096 + .globl t4_ffc_st1 + .type t4_ffc_st1, %function + .space 4096 - 4 +t4_ffc_st1: + adrp x24, dat2 + st1 { v0.16b }, [x1] + ldr x22, [x23, :got_lo12:dat2] + str x24, [x24, #32760] + ret + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 4FFF8 in unpatched output. + .section .text.24, "ax", %progbits + .balign 4096 + .globl t3_ff8_ldr_once + .type t3_ff8_ldr_once, %function + .space 4096 - 8 +t3_ff8_ldr_once: + adrp x0, dat3 + st1 { v0.16b }, [x1], x2 + ldr x1, [x0, #16] + ldr x2, [x0, #16] + ret + + .text + .globl _start + .type _start, %function +_start: + ret + + .data + .globl dat + .globl dat2 + .globl dat3 +dat1: .quad 1 +dat2: .quad 2 +dat3: .quad 3