diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3406,6 +3406,20 @@ def mno_fix_cmse_cve_2021_35465 : Flag<["-"], "mno-fix-cmse-cve-2021-35465">, Group, HelpText<"Don't work around VLLDM erratum CVE-2021-35465 (ARM only)">; +def mfix_cortex_a57_aes_1742098 : Flag<["-"], "mfix-cortex-a57-aes-1742098">, + Group, + HelpText<"Work around Cortex-A57 Erratum 1742098 (ARM only)">; +def mno_fix_cortex_a57_aes_1742098 : Flag<["-"], "mno-fix-cortex-a57-aes-1742098">, + Group, + HelpText<"Don't work around Cortex-A57 Erratum 1742098 (ARM only)">; +def mfix_cortex_a72_aes_1655431 : Flag<["-"], "mfix-cortex-a72-aes-1655431">, + Group, + HelpText<"Work around Cortex-A72 Erratum 1655431 (ARM only)">, + Alias; +def mno_fix_cortex_a72_aes_1655431 : Flag<["-"], "mno-fix-cortex-a72-aes-1655431">, + Group, + HelpText<"Don't work around Cortex-A72 Erratum 1655431 (ARM only)">, + Alias; def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">, Group, HelpText<"Workaround Cortex-A53 erratum 835769 (AArch64 only)">; diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -733,6 +733,16 @@ Features.push_back("-fix-cmse-cve-2021-35465"); } + // This also handles the -m(no-)fix-cortex-a72-1655431 arguments via aliases. + if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a57_aes_1742098, + options::OPT_mno_fix_cortex_a57_aes_1742098)) { + if (A->getOption().matches(options::OPT_mfix_cortex_a57_aes_1742098)) { + Features.push_back("+fix-cortex-a57-aes-1742098"); + } else { + Features.push_back("-fix-cortex-a57-aes-1742098"); + } + } + // Look for the last occurrence of -mlong-calls or -mno-long-calls. If // neither options are specified, see if we are compiling for kernel/kext and // decide whether to pass "+long-calls" based on the OS and its version. diff --git a/clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c b/clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c @@ -0,0 +1,25 @@ +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mfix-cortex-a57-aes-1742098 2>&1 | FileCheck %s --check-prefix=FIX +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mno-fix-cortex-a57-aes-1742098 2>&1 | FileCheck %s --check-prefix=NO-FIX + +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mfix-cortex-a72-aes-1655431 2>&1 | FileCheck %s --check-prefix=FIX +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mno-fix-cortex-a72-aes-1655431 2>&1 | FileCheck %s --check-prefix=NO-FIX + +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a 2>&1 | FileCheck %s --check-prefix=UNSPEC +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a 2>&1 | FileCheck %s --check-prefix=UNSPEC + +// This test checks that "-m(no-)fix-cortex-a57-aes-1742098" and +// "-m(no-)fix-cortex-a72-aes-1655431" cause the "fix-cortex-a57-aes-1742098" +// target feature to be passed to `clang -cc1`. +// +// This feature is also enabled in the backend for the two affected CPUs and the +// "generic" cpu (used when only specifying -march), but that won't show up on +// the `clang -cc1` command line. +// +// We do not check whether this option is correctly specified for the CPU: users +// can specify the "-mfix-cortex-a57-aes-1742098" option with "-mcpu=cortex-a72" +// and vice-versa, and will still get the fix, as the target feature and the fix +// is the same in both cases. + +// FIX: "-target-feature" "+fix-cortex-a57-aes-1742098" +// NO-FIX: "-target-feature" "-fix-cortex-a57-aes-1742098" +// UNSPEC-NOT: "-target-feature" "{[+-]}fix-cortex-a57-aes-1742098" diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -57,6 +57,7 @@ FunctionPass *createARMSLSHardeningPass(); FunctionPass *createARMIndirectThunks(); Pass *createMVELaneInterleavingPass(); +FunctionPass *createARMFixCortexA57AES1742098Pass(); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); @@ -77,6 +78,7 @@ void initializeMVEGatherScatterLoweringPass(PassRegistry &); void initializeARMSLSHardeningPass(PassRegistry &); void initializeMVELaneInterleavingPass(PassRegistry &); +void initializeARMFixCortexA57AES1742098Pass(PassRegistry &); } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -538,6 +538,10 @@ "Don't place a BTI instruction " "after a return-twice">; +def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098", + "FixCortexA57AES1742098", "true", + "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; + //===----------------------------------------------------------------------===// // ARM architecture class // @@ -1153,7 +1157,7 @@ // ARM processors // // Dummy CPU, used to target architectures -def : ProcessorModel<"generic", CortexA8Model, []>; +def : ProcessorModel<"generic", CortexA8Model, [FeatureFixCortexA57AES1742098]>; // FIXME: Several processors below are not using their own scheduler // model, but one of similar/previous processor. These should be fixed. @@ -1462,13 +1466,15 @@ FeatureCRC, FeatureFPAO, FeatureAvoidPartialCPSR, - FeatureCheapPredicableCPSR]>; + FeatureCheapPredicableCPSR, + FeatureFixCortexA57AES1742098]>; def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFixCortexA57AES1742098]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, FeatureHWDivThumb, diff --git a/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp @@ -0,0 +1,402 @@ +//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This pass works around a Cortex Core Fused AES erratum: +// - Cortex-A57 Erratum 1742098 +// - Cortex-A72 Erratum 1655431 +// +// The intention is this: +// - Any 128-bit or 64-bit writes to the neon input register of an AES fused +// pair are safe (the inputs are to the AESE/AESD instruction). +// - Any 32-bit writes to the input register are unsafe, but these may happen +// in another function, or only on some control flow paths. In these cases, +// conservatively insert the VORRq anyway. +// - So, analyse both inputs to the AESE/AESD instruction, inserting a VORR if +// you cannot prove they're on a list of allowed instructions. +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMSubtarget.h" +#include "Utils/ARMBaseInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098" + +//===----------------------------------------------------------------------===// + +namespace { +class ARMFixCortexA57AES1742098 : public MachineFunctionPass { +public: + static char ID; + explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) { + initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "ARM fix for Cortex-A57 AES Erratum 1742098"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + // This is the information needed to insert the fixup in the right place. + struct AESFixupLocation { + MachineBasicBlock *Block; + // The fixup instruction will be inserted *before* InsertionPt. + MachineInstr *InsertionPt; + MachineOperand *MOp; + }; + + void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA, + const ARMBaseRegisterInfo *TRI, + SmallVectorImpl &FixupLocsForFn) const; + + void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, + const ARMBaseRegisterInfo *TRI) const; + + static bool isFirstAESPairInstr(MachineInstr &MI); + static bool isSafeAESInput(MachineInstr &MI); +}; +char ARMFixCortexA57AES1742098::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE, + "ARM fix for Cortex-A57 AES Erratum 1742098", false, + false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis); +INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE, + "ARM fix for Cortex-A57 AES Erratum 1742098", false, false) + +//===----------------------------------------------------------------------===// + +bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + return Opc == ARM::AESD || Opc == ARM::AESE; +} + +bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) { + auto CondCodeIsAL = [&MI](unsigned CCIdx) -> bool { + assert(MI.getDesc().OpInfo[CCIdx].isPredicate() && + "CCIdx operand must be a predicate"); + return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL; + }; + + switch (MI.getOpcode()) { + // Unknown: Assume not safe. + default: + return false; + // 128-bit wide AES instructions + case ARM::AESD: + case ARM::AESE: + case ARM::AESMC: + case ARM::AESIMC: + return true; + // 128-bit and 64-bit wide bitwise ops (when condition = al) + case ARM::VANDd: + case ARM::VANDq: + case ARM::VORRd: + case ARM::VORRq: + case ARM::VEORd: + case ARM::VEORq: + case ARM::VMVNd: + case ARM::VMVNq: + return CondCodeIsAL(3); + // VMOV of 64-bit value between D registers (when condition = al) + case ARM::VMOVD: + return CondCodeIsAL(2); + // VMOV of 64 bit value from GPRs (when condition = al) + case ARM::VMOVDRR: + return CondCodeIsAL(3); + // VMOV of 64-bit immediate into D or Q registers (when condition = al) + case ARM::VMOVv2i64: + case ARM::VMOVv1i64: + return CondCodeIsAL(2); + // Loads (when condition = al) + // VLD Dn, [Rn, #imm] + case ARM::VLDRD: + return CondCodeIsAL(3); + // VLDM + case ARM::VLDMDDB_UPD: + case ARM::VLDMDIA_UPD: + return CondCodeIsAL(2); + case ARM::VLDMDIA: + return CondCodeIsAL(1); + // VLDn to all lanes (to one single lane is unsafe). + case ARM::VLD1d64: + case ARM::VLD1q64: + case ARM::VLD1d32: + case ARM::VLD1q32: + case ARM::VLD2b32: + case ARM::VLD2d32: + case ARM::VLD2q32: + case ARM::VLD1d16: + case ARM::VLD1q16: + case ARM::VLD2d16: + case ARM::VLD2q16: + case ARM::VLD1d8: + case ARM::VLD1q8: + case ARM::VLD2b8: + case ARM::VLD2d8: + case ARM::VLD2q8: + return CondCodeIsAL(3); + case ARM::VLD3d32: + case ARM::VLD3q32: + case ARM::VLD3d16: + case ARM::VLD3q16: + case ARM::VLD3d8: + case ARM::VLD3q8: + return CondCodeIsAL(5); + case ARM::VLD4d32: + case ARM::VLD4q32: + case ARM::VLD4d16: + case ARM::VLD4q16: + case ARM::VLD4d8: + case ARM::VLD4q8: + return CondCodeIsAL(6); + // Always Unsafe: + // VMOV of smaller immediate into D or Q + case ARM::VMOVv2f32: + case ARM::VMOVv4f32: + case ARM::VMOVv2i32: + case ARM::VMOVv4i32: + case ARM::VMOVv4i16: + case ARM::VMOVv8i16: + case ARM::VMOVv8i8: + case ARM::VMOVv16i8: + return false; + }; + + return false; +} + +bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n"); + auto &STI = F.getSubtarget(); + + // Fix not requested or AES instructions not present: skip pass. + if (!STI.hasAES() || !STI.fixCortexA57AES1742098()) + return false; + + const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo(); + const ARMBaseInstrInfo *TII = STI.getInstrInfo(); + + auto &RDA = getAnalysis(); + + // Analyze whole function to find instructions which need fixing up... + SmallVector FixupLocsForFn{}; + analyzeMF(F, RDA, TRI, FixupLocsForFn); + + // ... and fix the instructions up all at the same time. + bool Changed = false; + LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n"); + for (AESFixupLocation &FixupLoc : FixupLocsForFn) { + insertAESFixup(FixupLoc, TII, TRI); + Changed |= true; + } + + return Changed; +} + +void ARMFixCortexA57AES1742098::analyzeMF( + MachineFunction &MF, ReachingDefAnalysis &RDA, + const ARMBaseRegisterInfo *TRI, + SmallVectorImpl &FixupLocsForFn) const { + unsigned MaxAllowedFixups = 0; + + for (MachineBasicBlock &MBB : MF) { + // Early return if no instructions are the start of an AES Pair. + if (!llvm::any_of(MBB.instrs(), isFirstAESPairInstr)) + continue; + + for (MachineInstr &MI : MBB) { + if (!isFirstAESPairInstr(MI)) + continue; + + // Found an instruction to check the operands of. + LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI); + assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 && + "Unknown AES Instruction Format. Expected 1 def, 2 uses."); + + // A maximum of two fixups should be inserted for each AES pair (one per + // register use). + MaxAllowedFixups += 2; + + // Inspect all operands, choosing whether to insert a fixup. + for (MachineOperand &MOp : MI.uses()) { + SmallPtrSet AllDefs{}; + RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs); + + // Planned Fixup: This should be added to FixupLocsForFn at most once. + AESFixupLocation NewLoc{&MBB, &MI, &MOp}; + + // In small functions with loops, this operand may be both a live-in and + // have definitions within the function itself. These will need a fixup. + bool IsLiveIn = MF.front().isLiveIn(MOp.getReg()); + + // If the register doesn't have defining instructions, and is not a + // live-in, then something is wrong and the fixup must always be + // inserted to be safe. + if (!IsLiveIn && AllDefs.size() == 0) { + LLVM_DEBUG(dbgs() + << "Fixup Planned: No Defining Instrs found, not live-in: " + << printReg(MOp.getReg(), TRI) << "\n"); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + auto IsUnsafe = [](MachineInstr *MI) -> bool { + return !isSafeAESInput(*MI); + }; + size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe); + + // If there are no unsafe unsafe definitions... + if (UnsafeCount == 0) { + // ... and the register is not live-in ... + if (!IsLiveIn) { + // ... then skip the fixup. + LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: " + << printReg(MOp.getReg(), TRI) << "\n"); + continue; + } + + // Otherwise, the only unsafe "definition" is a live-in, so insert the + // fixup at the start of the function. + LLVM_DEBUG(dbgs() + << "Fixup Planned: Live-In (with safe defining instrs): " + << printReg(MOp.getReg(), TRI) << "\n"); + NewLoc.Block = &MF.front(); + NewLoc.InsertionPt = &*NewLoc.Block->begin(); + LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before " + << *NewLoc.InsertionPt); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + // If a fixup is needed in more than one place, then the best place to + // insert it is adjacent to the use rather than introducing a fixup + // adjacent to each def. + // + // FIXME: It might be better to hoist this to the start of the BB, if + // possible. + if (IsLiveIn || UnsafeCount > 1) { + LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs " + "(including live-ins): " + << printReg(MOp.getReg(), TRI) << "\n"); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + assert(UnsafeCount == 1 && !IsLiveIn && + "At this point, there should be one unsafe defining instrs " + "and the defined register should not be a live-in."); + SmallPtrSetIterator It = + llvm::find_if(AllDefs, IsUnsafe); + assert(It != AllDefs.end() && + "UnsafeCount == 1 but No Unsafe MachineInstr found."); + MachineInstr *DefMI = *It; + + LLVM_DEBUG( + dbgs() << "Fixup Planned: Found single unsafe defining instrs for " + << printReg(MOp.getReg(), TRI) << ": " << *DefMI); + + // There is one unsafe defining instruction, which needs a fixup. It is + // generally good to hoist the fixup to be adjacent to the defining + // instruction rather than the using instruction, as the using + // instruction may be inside a loop when the defining instruction is + // not. + MachineBasicBlock::iterator DefIt = DefMI; + ++DefIt; + if (DefIt != DefMI->getParent()->end()) { + LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI + << "And immediately before " << *DefIt); + NewLoc.Block = DefIt->getParent(); + NewLoc.InsertionPt = &*DefIt; + } + + FixupLocsForFn.emplace_back(NewLoc); + } + } + } + + assert(FixupLocsForFn.size() <= MaxAllowedFixups && + "Inserted too many fixups for this function."); +} + +void ARMFixCortexA57AES1742098::insertAESFixup( + AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, + const ARMBaseRegisterInfo *TRI) const { + MachineOperand *OperandToFixup = FixupLoc.MOp; + + assert(OperandToFixup->isReg() && "OperandToFixup must be a register"); + Register RegToFixup = OperandToFixup->getReg(); + + LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI) + << " before: " << *FixupLoc.InsertionPt); + + // Insert the new `VORRq qN, qN, qN`. There are a few details here: + // + // The uses are marked as killed, even if the original use of OperandToFixup + // is not killed, as the new instruction is clobbering the register. This is + // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op + // (it is inserted for microarchitectural reasons). + // + // The def and the uses are still marked as Renamable if the original register + // was, to avoid having to rummage through all the other uses and defs and + // unset their renamable bits. + unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0; + BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(), + TII->get(ARM::VORRq)) + .addReg(RegToFixup, RegState::Define | Renamable) + .addReg(RegToFixup, RegState::Kill | Renamable) + .addReg(RegToFixup, RegState::Kill | Renamable) + .addImm((uint64_t)ARMCC::AL) + .addReg(ARM::NoRegister); +} + +// Factory function used by AArch64TargetMachine to add the pass to +// the passmanager. +FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() { + return new ARMFixCortexA57AES1742098(); +} diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -107,6 +107,7 @@ initializeMVEGatherScatterLoweringPass(Registry); initializeARMSLSHardeningPass(Registry); initializeMVELaneInterleavingPass(Registry); + initializeARMFixCortexA57AES1742098Pass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -581,6 +582,8 @@ addPass(createARMConstantIslandPass()); addPass(createARMLowOverheadLoopsPass()); + addPass(createARMFixCortexA57AES1742098Pass()); + if (TM->getTargetTriple().isOSWindows()) { // Identify valid longjmp targets for Windows Control Flow Guard. addPass(createCFGuardLongjmpPass()); diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -32,6 +32,7 @@ ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp ARMFastISel.cpp + ARMFixCortexA57AES1742098Pass.cpp ARMFrameLowering.cpp ARMHazardRecognizer.cpp ARMInstructionSelector.cpp diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -188,6 +188,8 @@ ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: ReachingDefAnalysis ; CHECK-NEXT: ARM Low Overhead Loops pass +; CHECK-NEXT: ReachingDefAnalysis +; CHECK-NEXT: ARM fix for Cortex-A57 AES Erratum 1742098 ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: ARM Assembly Printer diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll --- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll +++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll @@ -24,6 +24,7 @@ ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: vmov.i32 q9, #0x0 +; CHECK-FIX-NEXT: vorr q9, q9, q9 ; CHECK-FIX-NEXT: aese.8 q9, q8 ; CHECK-FIX-NEXT: aesmc.8 q8, q9 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] @@ -55,6 +56,8 @@ define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { ; CHECK-FIX-LABEL: aese_once_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q1, q0 ; CHECK-FIX-NEXT: aesmc.8 q0, q1 ; CHECK-FIX-NEXT: bx lr @@ -91,6 +94,9 @@ define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { ; CHECK-FIX-LABEL: aese_twice_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q1, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q1 ; CHECK-FIX-NEXT: aese.8 q8, q0 @@ -154,6 +160,8 @@ define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind { ; CHECK-FIX-LABEL: aese_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB6_2 ; CHECK-FIX-NEXT: .LBB6_1: @ =>This Inner Loop Header: Depth=1 @@ -186,6 +194,7 @@ ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r0] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -202,8 +211,10 @@ define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_set8_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.8 d16[0], r0 +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -225,6 +236,7 @@ ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1] ; CHECK-FIX-NEXT: .LBB9_2: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -248,12 +260,14 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB10_2 ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vmov.8 d16[0], r1 ; CHECK-FIX-NEXT: .LBB10_2: @ %select.end +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -276,6 +290,7 @@ ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB11_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 @@ -318,6 +333,7 @@ ; CHECK-FIX-NEXT: vmov.8 d0[0], r1 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB12_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 @@ -355,6 +371,7 @@ ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r0:16] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -373,8 +390,10 @@ define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_set16_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.16 d16[0], r0 +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -398,6 +417,7 @@ ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16] ; CHECK-FIX-NEXT: .LBB15_2: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -427,12 +447,14 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB16_2 ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vmov.16 d16[0], r1 ; CHECK-FIX-NEXT: .LBB16_2: @ %select.end +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -457,6 +479,7 @@ ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB17_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 @@ -501,6 +524,7 @@ ; CHECK-FIX-NEXT: vmov.16 d0[0], r1 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB18_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 @@ -540,6 +564,7 @@ ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r0:32] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -558,8 +583,10 @@ define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_set32_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.32 d16[0], r0 +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -583,6 +610,7 @@ ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] ; CHECK-FIX-NEXT: .LBB21_2: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -612,12 +640,14 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB22_2 ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vmov.32 d16[0], r1 ; CHECK-FIX-NEXT: .LBB22_2: @ %select.end +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -642,6 +672,7 @@ ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB23_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 @@ -686,6 +717,7 @@ ; CHECK-FIX-NEXT: vmov.32 d0[0], r1 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB24_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 @@ -723,6 +755,7 @@ define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0] ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 @@ -732,6 +765,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0] ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 @@ -752,9 +786,11 @@ define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_set64_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NEXT: vmov.32 d16[1], r1 +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -775,6 +811,7 @@ ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: vldrne d0, [r1] +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -804,6 +841,7 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set64_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldr r1, [sp] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] @@ -812,6 +850,7 @@ ; CHECK-FIX-NEXT: vmov.32 d16[0], r2 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3 ; CHECK-FIX-NEXT: .LBB28_2: @ %select.end +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -830,6 +869,7 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set64_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB29_1: @@ -882,6 +922,7 @@ ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.32 d0[1], r3 ; CHECK-FIX-NEXT: .LBB30_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 @@ -921,6 +962,7 @@ ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: vmov.i32 q9, #0x0 +; CHECK-FIX-NEXT: vorr q9, q9, q9 ; CHECK-FIX-NEXT: aesd.8 q9, q8 ; CHECK-FIX-NEXT: aesimc.8 q8, q9 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] @@ -952,6 +994,8 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { ; CHECK-FIX-LABEL: aesd_once_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q1, q0 ; CHECK-FIX-NEXT: aesimc.8 q0, q1 ; CHECK-FIX-NEXT: bx lr @@ -988,6 +1032,9 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { ; CHECK-FIX-LABEL: aesd_twice_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q1, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q1 ; CHECK-FIX-NEXT: aesd.8 q8, q0 @@ -1051,6 +1098,8 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind { ; CHECK-FIX-LABEL: aesd_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB37_2 ; CHECK-FIX-NEXT: .LBB37_1: @ =>This Inner Loop Header: Depth=1 @@ -1083,6 +1132,7 @@ ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r0] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -1099,8 +1149,10 @@ define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_set8_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.8 d16[0], r0 +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -1122,6 +1174,7 @@ ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1] ; CHECK-FIX-NEXT: .LBB40_2: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1145,12 +1198,14 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB41_2 ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vmov.8 d16[0], r1 ; CHECK-FIX-NEXT: .LBB41_2: @ %select.end +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1173,6 +1228,7 @@ ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB42_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 @@ -1215,6 +1271,7 @@ ; CHECK-FIX-NEXT: vmov.8 d0[0], r1 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB43_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 @@ -1252,6 +1309,7 @@ ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r0:16] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -1270,8 +1328,10 @@ define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_set16_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.16 d16[0], r0 +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -1295,6 +1355,7 @@ ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16] ; CHECK-FIX-NEXT: .LBB46_2: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1324,12 +1385,14 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB47_2 ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vmov.16 d16[0], r1 ; CHECK-FIX-NEXT: .LBB47_2: @ %select.end +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1354,6 +1417,7 @@ ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB48_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 @@ -1398,6 +1462,7 @@ ; CHECK-FIX-NEXT: vmov.16 d0[0], r1 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB49_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 @@ -1437,6 +1502,7 @@ ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r0:32] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -1455,8 +1521,10 @@ define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_set32_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.32 d16[0], r0 +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -1480,6 +1548,7 @@ ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] ; CHECK-FIX-NEXT: .LBB52_2: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1509,12 +1578,14 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB53_2 ; CHECK-FIX-NEXT: @ %bb.1: ; CHECK-FIX-NEXT: vmov.32 d16[0], r1 ; CHECK-FIX-NEXT: .LBB53_2: @ %select.end +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1539,6 +1610,7 @@ ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB54_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 @@ -1583,6 +1655,7 @@ ; CHECK-FIX-NEXT: vmov.32 d0[0], r1 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: .LBB55_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 @@ -1620,6 +1693,7 @@ define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0] ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 @@ -1629,6 +1703,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0] ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 @@ -1649,9 +1724,11 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_set64_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: vmov.32 d16[0], r0 ; CHECK-FIX-NEXT: vmov.32 d16[1], r1 +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1672,6 +1749,7 @@ ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: vldrne d0, [r1] +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1701,6 +1779,7 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set64_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldr r1, [sp] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] @@ -1709,6 +1788,7 @@ ; CHECK-FIX-NEXT: vmov.32 d16[0], r2 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3 ; CHECK-FIX-NEXT: .LBB59_2: @ %select.end +; CHECK-FIX-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -1727,6 +1807,7 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set64_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB60_1: @@ -1779,6 +1860,7 @@ ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.32 d0[1], r3 ; CHECK-FIX-NEXT: .LBB61_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NEXT: subs r0, r0, #1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8