diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3405,6 +3405,20 @@ def mno_fix_cmse_cve_2021_35465 : Flag<["-"], "mno-fix-cmse-cve-2021-35465">, Group, HelpText<"Don't work around VLLDM erratum CVE-2021-35465 (ARM only)">; +def mfix_cortex_a57_aes_1742098 : Flag<["-"], "mfix-cortex-a57-aes-1742098">, + Group, + HelpText<"Work around Cortex-A57 Erratum 1742098 (ARM only)">; +def mno_fix_cortex_a57_aes_1742098 : Flag<["-"], "mno-fix-cortex-a57-aes-1742098">, + Group, + HelpText<"Don't work around Cortex-A57 Erratum 1742098 (ARM only)">; +def mfix_cortex_a72_aes_1655431 : Flag<["-"], "mfix-cortex-a72-aes-1655431">, + Group, + HelpText<"Work around Cortex-A72 Erratum 1655431 (ARM only)">, + Alias; +def mno_fix_cortex_a72_aes_1655431 : Flag<["-"], "mno-fix-cortex-a72-aes-1655431">, + Group, + HelpText<"Don't work around Cortex-A72 Erratum 1655431 (ARM only)">, + Alias; def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">, Group, HelpText<"Workaround Cortex-A53 erratum 835769 (AArch64 only)">; diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -733,6 +733,16 @@ Features.push_back("-fix-cmse-cve-2021-35465"); } + // This also handles the -m(no-)fix-cortex-a72-1655431 arguments via aliases. + if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a57_aes_1742098, + options::OPT_mno_fix_cortex_a57_aes_1742098)) { + if (A->getOption().matches(options::OPT_mfix_cortex_a57_aes_1742098)) { + Features.push_back("+fix-cortex-a57-aes-1742098"); + } else { + Features.push_back("-fix-cortex-a57-aes-1742098"); + } + } + // Look for the last occurrence of -mlong-calls or -mno-long-calls. If // neither options are specified, see if we are compiling for kernel/kext and // decide whether to pass "+long-calls" based on the OS and its version. diff --git a/clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c b/clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c @@ -0,0 +1,25 @@ +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mfix-cortex-a57-aes-1742098 2>&1 | FileCheck %s --check-prefix=FIX +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mno-fix-cortex-a57-aes-1742098 2>&1 | FileCheck %s --check-prefix=NO-FIX + +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mfix-cortex-a72-aes-1655431 2>&1 | FileCheck %s --check-prefix=FIX +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mno-fix-cortex-a72-aes-1655431 2>&1 | FileCheck %s --check-prefix=NO-FIX + +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a 2>&1 | FileCheck %s --check-prefix=UNSPEC +// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a 2>&1 | FileCheck %s --check-prefix=UNSPEC + +// This test checks that "-m(no-)fix-cortex-a57-aes-1742098" and +// "-m(no-)fix-cortex-a72-aes-1655431" cause the "fix-cortex-a57-aes-1742098" +// target feature to be passed to `clang -cc1`. +// +// This feature is also enabled in the backend for the two affected CPUs and the +// "generic" cpu (used when only specifying -march), but that won't show up on +// the `clang -cc1` command line. +// +// We do not check whether this option is correctly specified for the CPU: users +// can specify the "-mfix-cortex-a57-aes-1742098" option with "-mcpu=cortex-a72" +// and vice-versa, and will still get the fix, as the target feature and the fix +// is the same in both cases. + +// FIX: "-target-feature" "+fix-cortex-a57-aes-1742098" +// NO-FIX: "-target-feature" "-fix-cortex-a57-aes-1742098" +// UNSPEC-NOT: "-target-feature" "{[+-]}fix-cortex-a57-aes-1742098" diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -57,6 +57,7 @@ FunctionPass *createARMSLSHardeningPass(); FunctionPass *createARMIndirectThunks(); Pass *createMVELaneInterleavingPass(); +FunctionPass *createARMFixCortexA57AES1742098Pass(); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); @@ -77,6 +78,7 @@ void initializeMVEGatherScatterLoweringPass(PassRegistry &); void initializeARMSLSHardeningPass(PassRegistry &); void initializeMVELaneInterleavingPass(PassRegistry &); +void initializeARMFixCortexA57AES1742098Pass(PassRegistry &); } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -538,6 +538,10 @@ "Don't place a BTI instruction " "after a return-twice">; +def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098", + "FixCortexA57AES1742098", "true", + "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; + //===----------------------------------------------------------------------===// // ARM architecture class // @@ -1153,7 +1157,7 @@ // ARM processors // // Dummy CPU, used to target architectures -def : ProcessorModel<"generic", CortexA8Model, []>; +def : ProcessorModel<"generic", CortexA8Model, [FeatureFixCortexA57AES1742098]>; // FIXME: Several processors below are not using their own scheduler // model, but one of similar/previous processor. These should be fixed. @@ -1462,13 +1466,15 @@ FeatureCRC, FeatureFPAO, FeatureAvoidPartialCPSR, - FeatureCheapPredicableCPSR]>; + FeatureCheapPredicableCPSR, + FeatureFixCortexA57AES1742098]>; def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72, FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFixCortexA57AES1742098]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, FeatureHWDivThumb, diff --git a/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp @@ -0,0 +1,418 @@ +//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This pass works around a Cortex Core Fused AES erratum: +// - Cortex-A57 Erratum 1742098 +// - Cortex-A72 Erratum 1655431 +// +// The intention is this: +// - Any 128-bit or 64-bit writes to the neon input register of an AES fused +// pair are safe (the inputs are to the AESE/AESD instruction). +// - Any 32-bit writes to the input register are unsafe, but these may happen +// in another function, or only on some control flow paths. In these cases, +// conservatively insert the VORRq anyway. +// - So, analyse both inputs to the AESE/AESD instruction, inserting a VORR if +// you cannot prove they're on a list of allowed instructions. +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMSubtarget.h" +#include "Utils/ARMBaseInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098" + +//===----------------------------------------------------------------------===// + +namespace { +class ARMFixCortexA57AES1742098 : public MachineFunctionPass { +public: + static char ID; + explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) { + initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "ARM fix for Cortex-A57 AES Erratum 1742098"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + // This is the information needed to insert the fixup in the right place. + struct AESFixupLocation { + MachineBasicBlock *Block; + // The fixup instruction will be inserted *before* InsertionPt. + MachineInstr *InsertionPt; + MachineOperand *MOp; + }; + + void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA, + const ARMBaseRegisterInfo *TRI, + SmallVectorImpl &FixupLocsForFn) const; + + void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, + const ARMBaseRegisterInfo *TRI) const; + + static bool isFirstAESPairInstr(MachineInstr &MI); + static bool isSafeAESInput(MachineInstr &MI); +}; +char ARMFixCortexA57AES1742098::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE, + "ARM fix for Cortex-A57 AES Erratum 1742098", false, + false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis); +INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE, + "ARM fix for Cortex-A57 AES Erratum 1742098", false, false) + +//===----------------------------------------------------------------------===// + +bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + return Opc == ARM::AESD || Opc == ARM::AESE; +} + +bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) { + auto CondCodeIsAL = [](MachineInstr &MI) -> bool { + int CCIdx = MI.findFirstPredOperandIdx(); + if (CCIdx == -1) + return false; + return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL; + }; + + switch (MI.getOpcode()) { + // Unknown: Assume not safe. + default: + return false; + // 128-bit wide AES instructions + case ARM::AESD: + case ARM::AESE: + case ARM::AESMC: + case ARM::AESIMC: + // No CondCode. + return true; + // 128-bit and 64-bit wide bitwise ops (when condition = al) + case ARM::VANDd: + case ARM::VANDq: + case ARM::VORRd: + case ARM::VORRq: + case ARM::VEORd: + case ARM::VEORq: + case ARM::VMVNd: + case ARM::VMVNq: + // VMOV of 64-bit value between D registers (when condition = al) + case ARM::VMOVD: + // VMOV of 64 bit value from GPRs (when condition = al) + case ARM::VMOVDRR: + // VMOV of immediate into D or Q registers (when condition = al) + case ARM::VMOVv2i64: + case ARM::VMOVv1i64: + case ARM::VMOVv2f32: + case ARM::VMOVv4f32: + case ARM::VMOVv2i32: + case ARM::VMOVv4i32: + case ARM::VMOVv4i16: + case ARM::VMOVv8i16: + case ARM::VMOVv8i8: + case ARM::VMOVv16i8: + // Loads (when condition = al) + // VLD Dn, [Rn, #imm] + case ARM::VLDRD: + // VLDM + case ARM::VLDMDDB_UPD: + case ARM::VLDMDIA_UPD: + case ARM::VLDMDIA: + // VLDn to all lanes. + case ARM::VLD1d64: + case ARM::VLD1q64: + case ARM::VLD1d32: + case ARM::VLD1q32: + case ARM::VLD2b32: + case ARM::VLD2d32: + case ARM::VLD2q32: + case ARM::VLD1d16: + case ARM::VLD1q16: + case ARM::VLD2d16: + case ARM::VLD2q16: + case ARM::VLD1d8: + case ARM::VLD1q8: + case ARM::VLD2b8: + case ARM::VLD2d8: + case ARM::VLD2q8: + case ARM::VLD3d32: + case ARM::VLD3q32: + case ARM::VLD3d16: + case ARM::VLD3q16: + case ARM::VLD3d8: + case ARM::VLD3q8: + case ARM::VLD4d32: + case ARM::VLD4q32: + case ARM::VLD4d16: + case ARM::VLD4q16: + case ARM::VLD4d8: + case ARM::VLD4q8: + // VLD1 (single element to one lane) + case ARM::VLD1LNd32: + case ARM::VLD1LNd32_UPD: + case ARM::VLD1LNd8: + case ARM::VLD1LNd8_UPD: + case ARM::VLD1LNd16: + case ARM::VLD1LNd16_UPD: + // VLD1 (single element to all lanes) + case ARM::VLD1DUPd32: + case ARM::VLD1DUPd32wb_fixed: + case ARM::VLD1DUPd32wb_register: + case ARM::VLD1DUPd16: + case ARM::VLD1DUPd16wb_fixed: + case ARM::VLD1DUPd16wb_register: + case ARM::VLD1DUPd8: + case ARM::VLD1DUPd8wb_fixed: + case ARM::VLD1DUPd8wb_register: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq32wb_register: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq8wb_register: + // VMOV + case ARM::VSETLNi32: + case ARM::VSETLNi16: + case ARM::VSETLNi8: + return CondCodeIsAL(MI); + }; + + return false; +} + +bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n"); + auto &STI = F.getSubtarget(); + + // Fix not requested or AES instructions not present: skip pass. + if (!STI.hasAES() || !STI.fixCortexA57AES1742098()) + return false; + + const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo(); + const ARMBaseInstrInfo *TII = STI.getInstrInfo(); + + auto &RDA = getAnalysis(); + + // Analyze whole function to find instructions which need fixing up... + SmallVector FixupLocsForFn{}; + analyzeMF(F, RDA, TRI, FixupLocsForFn); + + // ... and fix the instructions up all at the same time. + bool Changed = false; + LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n"); + for (AESFixupLocation &FixupLoc : FixupLocsForFn) { + insertAESFixup(FixupLoc, TII, TRI); + Changed |= true; + } + + return Changed; +} + +void ARMFixCortexA57AES1742098::analyzeMF( + MachineFunction &MF, ReachingDefAnalysis &RDA, + const ARMBaseRegisterInfo *TRI, + SmallVectorImpl &FixupLocsForFn) const { + unsigned MaxAllowedFixups = 0; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isFirstAESPairInstr(MI)) + continue; + + // Found an instruction to check the operands of. + LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI); + assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 && + "Unknown AES Instruction Format. Expected 1 def, 2 uses."); + + // A maximum of two fixups should be inserted for each AES pair (one per + // register use). + MaxAllowedFixups += 2; + + // Inspect all operands, choosing whether to insert a fixup. + for (MachineOperand &MOp : MI.uses()) { + SmallPtrSet AllDefs{}; + RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs); + + // Planned Fixup: This should be added to FixupLocsForFn at most once. + AESFixupLocation NewLoc{&MBB, &MI, &MOp}; + + // In small functions with loops, this operand may be both a live-in and + // have definitions within the function itself. These will need a fixup. + bool IsLiveIn = MF.front().isLiveIn(MOp.getReg()); + + // If the register doesn't have defining instructions, and is not a + // live-in, then something is wrong and the fixup must always be + // inserted to be safe. + if (!IsLiveIn && AllDefs.size() == 0) { + LLVM_DEBUG(dbgs() + << "Fixup Planned: No Defining Instrs found, not live-in: " + << printReg(MOp.getReg(), TRI) << "\n"); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + auto IsUnsafe = [](MachineInstr *MI) -> bool { + return !isSafeAESInput(*MI); + }; + size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe); + + // If there are no unsafe unsafe definitions... + if (UnsafeCount == 0) { + // ... and the register is not live-in ... + if (!IsLiveIn) { + // ... then skip the fixup. + LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: " + << printReg(MOp.getReg(), TRI) << "\n"); + continue; + } + + // Otherwise, the only unsafe "definition" is a live-in, so insert the + // fixup at the start of the function. + LLVM_DEBUG(dbgs() + << "Fixup Planned: Live-In (with safe defining instrs): " + << printReg(MOp.getReg(), TRI) << "\n"); + NewLoc.Block = &MF.front(); + NewLoc.InsertionPt = &*NewLoc.Block->begin(); + LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before " + << *NewLoc.InsertionPt); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + // If a fixup is needed in more than one place, then the best place to + // insert it is adjacent to the use rather than introducing a fixup + // adjacent to each def. + // + // FIXME: It might be better to hoist this to the start of the BB, if + // possible. + if (IsLiveIn || UnsafeCount > 1) { + LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs " + "(including live-ins): " + << printReg(MOp.getReg(), TRI) << "\n"); + FixupLocsForFn.emplace_back(NewLoc); + continue; + } + + assert(UnsafeCount == 1 && !IsLiveIn && + "At this point, there should be one unsafe defining instrs " + "and the defined register should not be a live-in."); + SmallPtrSetIterator It = + llvm::find_if(AllDefs, IsUnsafe); + assert(It != AllDefs.end() && + "UnsafeCount == 1 but No Unsafe MachineInstr found."); + MachineInstr *DefMI = *It; + + LLVM_DEBUG( + dbgs() << "Fixup Planned: Found single unsafe defining instrs for " + << printReg(MOp.getReg(), TRI) << ": " << *DefMI); + + // There is one unsafe defining instruction, which needs a fixup. It is + // generally good to hoist the fixup to be adjacent to the defining + // instruction rather than the using instruction, as the using + // instruction may be inside a loop when the defining instruction is + // not. + MachineBasicBlock::iterator DefIt = DefMI; + ++DefIt; + if (DefIt != DefMI->getParent()->end()) { + LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI + << "And immediately before " << *DefIt); + NewLoc.Block = DefIt->getParent(); + NewLoc.InsertionPt = &*DefIt; + } + + FixupLocsForFn.emplace_back(NewLoc); + } + } + } + + assert(FixupLocsForFn.size() <= MaxAllowedFixups && + "Inserted too many fixups for this function."); +} + +void ARMFixCortexA57AES1742098::insertAESFixup( + AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, + const ARMBaseRegisterInfo *TRI) const { + MachineOperand *OperandToFixup = FixupLoc.MOp; + + assert(OperandToFixup->isReg() && "OperandToFixup must be a register"); + Register RegToFixup = OperandToFixup->getReg(); + + LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI) + << " before: " << *FixupLoc.InsertionPt); + + // Insert the new `VORRq qN, qN, qN`. There are a few details here: + // + // The uses are marked as killed, even if the original use of OperandToFixup + // is not killed, as the new instruction is clobbering the register. This is + // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op + // (it is inserted for microarchitectural reasons). + // + // The def and the uses are still marked as Renamable if the original register + // was, to avoid having to rummage through all the other uses and defs and + // unset their renamable bits. + unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0; + BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(), + TII->get(ARM::VORRq)) + .addReg(RegToFixup, RegState::Define | Renamable) + .addReg(RegToFixup, RegState::Kill | Renamable) + .addReg(RegToFixup, RegState::Kill | Renamable) + .addImm((uint64_t)ARMCC::AL) + .addReg(ARM::NoRegister); +} + +// Factory function used by AArch64TargetMachine to add the pass to +// the passmanager. +FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() { + return new ARMFixCortexA57AES1742098(); +} diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -107,6 +107,7 @@ initializeMVEGatherScatterLoweringPass(Registry); initializeARMSLSHardeningPass(Registry); initializeMVELaneInterleavingPass(Registry); + initializeARMFixCortexA57AES1742098Pass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -577,8 +578,19 @@ } void ARMPassConfig::addPreEmitPass2() { + // Inserts fixup instructions before unsafe AES operations. Instructions may + // be inserted at the start of blocks and at within blocks so this pass has to + // come before those below. + addPass(createARMFixCortexA57AES1742098Pass()); + // Inserts BTIs at the start of functions and indirectly-called basic blocks, + // so passes cannot add to the start of basic blocks once this has run. addPass(createARMBranchTargetsPass()); + // Inserts Constant Islands. No new instructions may be inserted after this + // point, as this will affect the offsets used for accessing these constants. addPass(createARMConstantIslandPass()); + // Finalises Low-Overhead Loops. This relies on knowing the final block size, + // but can run after constant islands as it does not insert additional + // instructions. addPass(createARMLowOverheadLoopsPass()); if (TM->getTargetTriple().isOSWindows()) { diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -32,6 +32,7 @@ ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp ARMFastISel.cpp + ARMFixCortexA57AES1742098Pass.cpp ARMFrameLowering.cpp ARMHazardRecognizer.cpp ARMInstructionSelector.cpp diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -181,6 +181,8 @@ ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: ReachingDefAnalysis +; CHECK-NEXT: ARM fix for Cortex-A57 AES Erratum 1742098 ; CHECK-NEXT: ARM Branch Targets ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: ARM constant island placement and branch shortening pass diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll --- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll +++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll @@ -47,6 +47,7 @@ ; CHECK-FIX-NEXT: push {r4, lr} ; CHECK-FIX-NEXT: mov r4, r0 ; CHECK-FIX-NEXT: bl get_input +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: aese.8 q0, q8 ; CHECK-FIX-NEXT: aesmc.8 q8, q0 @@ -67,6 +68,7 @@ ; CHECK-FIX-NEXT: push {r4, lr} ; CHECK-FIX-NEXT: mov r4, r0 ; CHECK-FIX-NEXT: bl get_inputf16 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: aese.8 q0, q8 ; CHECK-FIX-NEXT: aesmc.8 q8, q0 @@ -87,6 +89,7 @@ ; CHECK-FIX-NEXT: push {r4, lr} ; CHECK-FIX-NEXT: mov r4, r0 ; CHECK-FIX-NEXT: bl get_inputf32 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: aese.8 q0, q8 ; CHECK-FIX-NEXT: aesmc.8 q8, q0 @@ -120,6 +123,8 @@ define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { ; CHECK-FIX-LABEL: aese_once_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q1, q0 ; CHECK-FIX-NEXT: aesmc.8 q0, q1 ; CHECK-FIX-NEXT: bx lr @@ -156,6 +161,9 @@ define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { ; CHECK-FIX-LABEL: aese_twice_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q1, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q1 ; CHECK-FIX-NEXT: aese.8 q8, q0 @@ -219,6 +227,8 @@ define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind { ; CHECK-FIX-LABEL: aese_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB9_2 ; CHECK-FIX-NEXT: .LBB9_1: @ =>This Inner Loop Header: Depth=1 @@ -249,6 +259,7 @@ define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0 @@ -260,6 +271,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0] ; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0 @@ -281,6 +293,7 @@ define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_set8_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.8 d0[0], r0 ; CHECK-FIX-NEXT: vmov.8 d16[0], r0 @@ -300,6 +313,7 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_cond_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB12_2 ; CHECK-FIX-NEXT: @ %bb.1: @@ -351,6 +365,7 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB13_2 @@ -380,6 +395,7 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldrb r1, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: strb r1, [r2] @@ -426,6 +442,7 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB15_1: @@ -469,6 +486,7 @@ define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0 @@ -480,6 +498,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0] ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0 @@ -505,6 +524,7 @@ define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_set16_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.16 d0[0], r0 ; CHECK-FIX-NEXT: vmov.16 d16[0], r0 @@ -528,6 +548,7 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_cond_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB18_2 ; CHECK-FIX-NEXT: @ %bb.1: @@ -588,6 +609,7 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB19_2 @@ -621,6 +643,7 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldrh r1, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: strh r1, [r2] @@ -669,6 +692,7 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB21_1: @@ -714,6 +738,7 @@ define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0 @@ -725,6 +750,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0] ; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0 @@ -750,6 +776,7 @@ define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_set32_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.32 d0[0], r0 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0 @@ -773,6 +800,7 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB24_2 ; CHECK-FIX-NEXT: @ %bb.1: @@ -833,6 +861,7 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB25_2 @@ -866,6 +895,7 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldr r1, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: str r1, [r2] @@ -914,6 +944,7 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB27_1: @@ -959,6 +990,7 @@ define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0 @@ -969,6 +1001,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0] ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0 @@ -993,6 +1026,7 @@ define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_set64_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: vmov.32 d0[0], r0 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0 @@ -1029,6 +1063,7 @@ ; CHECK-FIX-NOSCHED-NEXT: .LBB30_3: ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1] +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] @@ -1048,6 +1083,7 @@ ; CHECK-CORTEX-FIX-NEXT: .LBB30_3: ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 ; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1] +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -1093,6 +1129,7 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set64_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldr r1, [sp] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] @@ -1129,6 +1166,7 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr} ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr} ; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1] @@ -1150,6 +1188,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr} ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr} ; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1] @@ -1200,6 +1239,7 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_set64_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB33_1: @@ -1248,6 +1288,7 @@ define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0 @@ -1259,6 +1300,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0] ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0 @@ -1285,6 +1327,7 @@ define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aese_setf16_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NEXT: vmov r1, s0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: vmov.16 d2[0], r1 @@ -1964,6 +2007,7 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldrh r1, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: strh r1, [r2] @@ -2013,6 +2057,7 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_setf16_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB39_1: @@ -2066,6 +2111,8 @@ ; CHECK-FIX-NEXT: vldr s0, [r0] ; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1] ; CHECK-FIX-NEXT: vmov.f32 s4, s0 +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aese.8 q1, q0 ; CHECK-FIX-NEXT: aesmc.8 q8, q1 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -2090,6 +2137,8 @@ ; CHECK-FIX-NEXT: vmov.f32 s4, s0 ; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0] ; CHECK-FIX-NEXT: vmov.f32 s0, s4 +; CHECK-FIX-NEXT: vorr q0, q0, q0 +; CHECK-FIX-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NEXT: aese.8 q0, q1 ; CHECK-FIX-NEXT: aesmc.8 q8, q0 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] @@ -2110,6 +2159,7 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB42_2 ; CHECK-FIX-NEXT: @ %bb.1: @@ -2173,8 +2223,10 @@ ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0 +; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0 +; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] @@ -2185,8 +2237,10 @@ ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1] ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0 +; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0 +; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1 ; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -2217,6 +2271,7 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NOSCHED-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 @@ -2235,6 +2290,7 @@ ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4 ; CHECK-CORTEX-FIX-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 @@ -2283,6 +2339,8 @@ ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0 +; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2 +; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] @@ -2299,7 +2357,9 @@ ; CHECK-CORTEX-FIX-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1] ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0 +; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 +; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1 ; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -2357,6 +2417,7 @@ ; CHECK-FIX-NEXT: push {r4, lr} ; CHECK-FIX-NEXT: mov r4, r0 ; CHECK-FIX-NEXT: bl get_input +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: aesd.8 q0, q8 ; CHECK-FIX-NEXT: aesimc.8 q8, q0 @@ -2377,6 +2438,7 @@ ; CHECK-FIX-NEXT: push {r4, lr} ; CHECK-FIX-NEXT: mov r4, r0 ; CHECK-FIX-NEXT: bl get_inputf16 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: aesd.8 q0, q8 ; CHECK-FIX-NEXT: aesimc.8 q8, q0 @@ -2397,6 +2459,7 @@ ; CHECK-FIX-NEXT: push {r4, lr} ; CHECK-FIX-NEXT: mov r4, r0 ; CHECK-FIX-NEXT: bl get_inputf32 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: aesd.8 q0, q8 ; CHECK-FIX-NEXT: aesimc.8 q8, q0 @@ -2430,6 +2493,8 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { ; CHECK-FIX-LABEL: aesd_once_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q1, q0 ; CHECK-FIX-NEXT: aesimc.8 q0, q1 ; CHECK-FIX-NEXT: bx lr @@ -2466,6 +2531,9 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { ; CHECK-FIX-LABEL: aesd_twice_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q1, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q1 ; CHECK-FIX-NEXT: aesd.8 q8, q0 @@ -2529,6 +2597,8 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind { ; CHECK-FIX-LABEL: aesd_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB55_2 ; CHECK-FIX-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1 @@ -2559,6 +2629,7 @@ define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0 @@ -2570,6 +2641,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0] ; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0 @@ -2591,6 +2663,7 @@ define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_set8_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.8 d0[0], r0 ; CHECK-FIX-NEXT: vmov.8 d16[0], r0 @@ -2610,6 +2683,7 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB58_2 ; CHECK-FIX-NEXT: @ %bb.1: @@ -2661,6 +2735,7 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB59_2 @@ -2690,6 +2765,7 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldrb r1, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: strb r1, [r2] @@ -2736,6 +2812,7 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB61_1: @@ -2779,6 +2856,7 @@ define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0 @@ -2790,6 +2868,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0] ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0 @@ -2815,6 +2894,7 @@ define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_set16_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.16 d0[0], r0 ; CHECK-FIX-NEXT: vmov.16 d16[0], r0 @@ -2838,6 +2918,7 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB64_2 ; CHECK-FIX-NEXT: @ %bb.1: @@ -2898,6 +2979,7 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB65_2 @@ -2931,6 +3013,7 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldrh r1, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: strh r1, [r2] @@ -2979,6 +3062,7 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB67_1: @@ -3024,6 +3108,7 @@ define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0 @@ -3035,6 +3120,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0] ; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0 @@ -3060,6 +3146,7 @@ define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_set32_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: vmov.32 d0[0], r0 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0 @@ -3083,6 +3170,7 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB70_2 ; CHECK-FIX-NEXT: @ %bb.1: @@ -3143,6 +3231,7 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB71_2 @@ -3176,6 +3265,7 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldr r1, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: str r1, [r2] @@ -3224,6 +3314,7 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB73_1: @@ -3269,6 +3360,7 @@ define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0 @@ -3279,6 +3371,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0] ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0 @@ -3303,6 +3396,7 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_set64_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: vmov.32 d0[0], r0 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0 @@ -3339,6 +3433,7 @@ ; CHECK-FIX-NOSCHED-NEXT: .LBB76_3: ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1] +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] @@ -3358,6 +3453,7 @@ ; CHECK-CORTEX-FIX-NEXT: .LBB76_3: ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 ; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1] +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] @@ -3403,6 +3499,7 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set64_cond_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldr r1, [sp] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] @@ -3439,6 +3536,7 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr} ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr} ; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1] @@ -3460,6 +3558,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr} ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr} ; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1] @@ -3510,6 +3609,7 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_set64_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB79_1: @@ -3558,6 +3658,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0] ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0 @@ -3569,6 +3670,7 @@ ; ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0] ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0 @@ -3595,6 +3697,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind { ; CHECK-FIX-LABEL: aesd_setf16_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NEXT: vmov r1, s0 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: vmov.16 d2[0], r1 @@ -4274,6 +4377,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: ldrh r1, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: strh r1, [r2] @@ -4323,6 +4427,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_setf16_loop_via_val: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bxeq lr ; CHECK-FIX-NEXT: .LBB85_1: @@ -4376,6 +4481,8 @@ ; CHECK-FIX-NEXT: vldr s0, [r0] ; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1] ; CHECK-FIX-NEXT: vmov.f32 s4, s0 +; CHECK-FIX-NEXT: vorr q1, q1, q1 +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: aesd.8 q1, q0 ; CHECK-FIX-NEXT: aesimc.8 q8, q1 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -4400,6 +4507,8 @@ ; CHECK-FIX-NEXT: vmov.f32 s4, s0 ; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0] ; CHECK-FIX-NEXT: vmov.f32 s0, s4 +; CHECK-FIX-NEXT: vorr q0, q0, q0 +; CHECK-FIX-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NEXT: aesd.8 q0, q1 ; CHECK-FIX-NEXT: aesimc.8 q8, q0 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] @@ -4420,6 +4529,7 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { ; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: +; CHECK-FIX-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB88_2 ; CHECK-FIX-NEXT: @ %bb.1: @@ -4483,8 +4593,10 @@ ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0 +; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0 +; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] @@ -4495,8 +4607,10 @@ ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1] ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0 +; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0 +; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] @@ -4527,6 +4641,7 @@ ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NOSCHED-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 @@ -4545,6 +4660,7 @@ ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4 ; CHECK-CORTEX-FIX-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1 +; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 @@ -4593,6 +4709,8 @@ ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0 +; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2 +; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] @@ -4609,7 +4727,9 @@ ; CHECK-CORTEX-FIX-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1] ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0 +; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 +; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]