diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -47,6 +47,7 @@ FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createMVEVPTBlockPass(); +FunctionPass *createMVEVPTOptimisationsPass(); FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass( std::function Ftor = nullptr); @@ -66,6 +67,7 @@ void initializeThumb2SizeReducePass(PassRegistry &); void initializeThumb2ITBlockPass(PassRegistry &); void initializeMVEVPTBlockPass(PassRegistry &); +void initializeMVEVPTOptimisationsPass(PassRegistry &); void initializeARMLowOverheadLoopsPass(PassRegistry &); void initializeMVETailPredicationPass(PassRegistry &); void initializeMVEGatherScatterLoweringPass(PassRegistry &); diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -96,6 +96,7 @@ initializeARMExpandPseudoPass(Registry); initializeThumb2SizeReducePass(Registry); initializeMVEVPTBlockPass(Registry); + initializeMVEVPTOptimisationsPass(Registry); initializeMVETailPredicationPass(Registry); initializeARMLowOverheadLoopsPass(Registry); initializeMVEGatherScatterLoweringPass(Registry); @@ -487,6 +488,8 @@ void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { + addPass(createMVEVPTOptimisationsPass()); + addPass(createMLxExpansionPass()); if (EnableARMLoadStoreOpt) diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -54,6 +54,7 @@ MVEGatherScatterLowering.cpp MVETailPredication.cpp MVEVPTBlockPass.cpp + MVEVPTOptimisationsPass.cpp Thumb1FrameLowering.cpp Thumb1InstrInfo.cpp ThumbRegisterInfo.cpp diff --git a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp @@ -0,0 +1,232 @@ +//===-- MVEVPTOptimisationsPass.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This pass does a few optimisations related to MVE VPT blocks before +/// register allocation is performed. The goal is to maximize the sizes of the +/// blocks that will be created by the MVE VPT Block Insertion pass (which runs +/// after register allocation). Currently, this pass replaces VCMPs with VPNOTs +/// when possible, so the Block Insertion pass can delete them later to create +/// larger VPT blocks. +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/Debug.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "arm-mve-vpt-opts" + +namespace { +class MVEVPTOptimisations : public MachineFunctionPass { +public: + static char ID; + const Thumb2InstrInfo *TII; + MachineRegisterInfo *MRI; + + MVEVPTOptimisations() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { + return "ARM MVE VPT Optimisation Pass"; + } + +private: + bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); +}; + +char MVEVPTOptimisations::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(MVEVPTOptimisations, DEBUG_TYPE, + "ARM MVE VPT Optimisations pass", false, false) + +// Returns true if Opcode is any VCMP Opcode. +static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; } + +// Returns true if a VCMP with this Opcode can have its operands swapped. +// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs, +// and VCMPr instructions (since the r is always on the right). +static bool CanHaveSwappedOperands(unsigned Opcode) { + switch (Opcode) { + default: + return true; + case ARM::MVE_VCMPf32: + case ARM::MVE_VCMPf16: + case ARM::MVE_VCMPf32r: + case ARM::MVE_VCMPf16r: + case ARM::MVE_VCMPi8r: + case ARM::MVE_VCMPi16r: + case ARM::MVE_VCMPi32r: + case ARM::MVE_VCMPu8r: + case ARM::MVE_VCMPu16r: + case ARM::MVE_VCMPu32r: + case ARM::MVE_VCMPs8r: + case ARM::MVE_VCMPs16r: + case ARM::MVE_VCMPs32r: + return false; + } +} + +// Returns the CondCode of a VCMP Instruction. +static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { + assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP"); + return ARMCC::CondCodes(Instr.getOperand(3).getImm()); +} + +// Returns true if Cond is equivalent to a VPNOT instruction on the result of +// Prev. Cond and Prev must be VCMPs. +static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { + assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode())); + + // Opcodes must match. + if (Cond.getOpcode() != Prev.getOpcode()) + return false; + + MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2); + MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2); + + // If the VCMP has the opposite condition with the same operands, we can + // replace it with a VPNOT + ARMCC::CondCodes ExpectedCode = GetCondCode(Cond); + ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode); + if (ExpectedCode == GetCondCode(Prev)) + if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2)) + return true; + // Check again with operands swapped if possible + if (!CanHaveSwappedOperands(Cond.getOpcode())) + return false; + ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode); + return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) && + CondOP2.isIdenticalTo(PrevOP1); +} + +// Returns true if Instr writes to VCCR. +static bool IsWritingToVCCR(MachineInstr &Instr) { + if (Instr.getNumOperands() == 0) + return false; + MachineOperand &Dst = Instr.getOperand(0); + if (!Dst.isReg()) + return false; + Register DstReg = Dst.getReg(); + if (!DstReg.isVirtual()) + return false; + MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo(); + const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg); + return RegClass && (RegClass->getID() == ARM::VCCRRegClassID); +} + +// This optimisation replaces VCMPs with VPNOTs when they are equivalent. +bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { + SmallVector DeadInstructions; + + // The last VCMP that we have seen and that couldn't be replaced. + // This is reset when an instruction that writes to VCCR/VPR is found, or when + // a VCMP is replaced with a VPNOT. + // We'll only replace VCMPs with VPNOTs when this is not null, and when the + // current VCMP is the opposite of PrevVCMP. + MachineInstr *PrevVCMP = nullptr; + // If we find an instruction that kills the result of PrevVCMP, we save the + // operand here to remove the kill flag in case we need to use PrevVCMP's + // result. + MachineOperand *PrevVCMPResultKiller = nullptr; + + for (MachineInstr &Instr : MBB.instrs()) { + if (PrevVCMP) { + if (MachineOperand *MO = Instr.findRegisterUseOperand( + PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) { + // If we come accross the instr that kills PrevVCMP's result, record it + // so we can remove the kill flag later if we need to. + PrevVCMPResultKiller = MO; + } + } + + // Ignore predicated instructions. + if (getVPTInstrPredicate(Instr) != ARMVCC::None) + continue; + + // Only look at VCMPs + if (!IsVCMP(Instr.getOpcode())) { + // If the instruction writes to VCCR, forget the previous VCMP. + if (IsWritingToVCCR(Instr)) + PrevVCMP = nullptr; + continue; + } + + if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) { + PrevVCMP = &Instr; + continue; + } + + // The register containing the result of the VCMP that we're going to + // replace. + Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg(); + + // Build a VPNOT to replace the VCMP, reusing its operands. + MachineInstrBuilder MIBuilder = + BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) + .add(Instr.getOperand(0)) + .addReg(PrevVCMPResultReg); + addUnpredicatedMveVpredNOp(MIBuilder); + LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): "; + MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: "; + Instr.dump()); + + // If we found an instruction that uses, and kills PrevVCMP's result, + // remove the kill flag. + if (PrevVCMPResultKiller) + PrevVCMPResultKiller->setIsKill(false); + + // Finally, mark the old VCMP for removal and reset + // PrevVCMP/PrevVCMPResultKiller. + DeadInstructions.push_back(&Instr); + PrevVCMP = nullptr; + PrevVCMPResultKiller = nullptr; + } + + for (MachineInstr *DeadInstruction : DeadInstructions) + DeadInstruction->removeFromParent(); + + return !DeadInstructions.empty(); +} + +bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { + const ARMSubtarget &STI = + static_cast(Fn.getSubtarget()); + + if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) + return false; + + TII = static_cast(STI.getInstrInfo()); + MRI = &Fn.getRegInfo(); + + LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n" + << "********** Function: " << Fn.getName() << '\n'); + + bool Modified = false; + for (MachineBasicBlock &MBB : Fn) + Modified |= ReplaceVCMPsByVPNOTs(MBB); + + LLVM_DEBUG(dbgs() << "**************************************\n"); + return Modified; +} + +/// createMVEVPTOptimisationsPass +FunctionPass *llvm::createMVEVPTOptimisationsPass() { + return new MVEVPTOptimisations(); +} diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -92,6 +92,7 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: MVE VPT Optimisation Pass ; CHECK-NEXT: ARM MLA / MLS expansion pass ; CHECK-NEXT: ARM pre- register allocation load / store optimization pass ; CHECK-NEXT: ARM A15 S->D optimizer diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-blocks.ll b/llvm/test/CodeGen/Thumb2/mve-vpt-blocks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-blocks.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi --verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s + +declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x i32> @vpt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vpt_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpt.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @vptt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vptt_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q0 +; CHECK-NEXT: vptt.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q3, q1, q2 +; CHECK-NEXT: vorrt q0, q3, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %1, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vpttt_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpttt.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2) + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vptttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vptttt_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vptttt.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) + ret <4 x i32> %4 +} + + +define arm_aapcs_vfpcc <4 x i32> @vpte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vpte_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpte.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vmove q0, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vptte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vptte_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vptte.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vorre q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = xor <4 x i1> %0, + %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3) + ret <4 x i32> %4 +} + +define arm_aapcs_vfpcc <4 x i32> @vptee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vptee_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vptee.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vorre q0, q1, q2 +; CHECK-NEXT: vorre q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = xor <4 x i1> %0, + %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %2) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3) + ret <4 x i32> %4 +} + +define arm_aapcs_vfpcc <4 x i32> @vptet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vptet_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vcmp.s32 ge, q0, q2 +; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) + ret <4 x i32> %4 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vpttet_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vcmp.s32 ge, q0, q2 +; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill +; CHECK-NEXT: vpstt +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3) + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4) + ret <4 x i32> %5 +} + +define arm_aapcs_vfpcc <4 x i32> @vptett_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vptett_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vcmp.s32 ge, q0, q2 +; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpstt +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4) + ret <4 x i32> %5 +} + +define arm_aapcs_vfpcc <4 x i32> @vpteet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vpteet_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vcmp.s32 ge, q0, q2 +; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpnot +; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3) + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4) + ret <4 x i32> %5 +} + +define arm_aapcs_vfpcc <4 x i32> @vpteee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vpteee_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpteee.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vmove q0, q2 +; CHECK-NEXT: vmove q0, q2 +; CHECK-NEXT: vmove q0, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3) + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4) + ret <4 x i32> %5 +} + +define arm_aapcs_vfpcc <4 x i32> @vptete_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vptete_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vcmp.s32 ge, q0, q2 +; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpnot +; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4) + ret <4 x i32> %5 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vpttte_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpttte.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vmove q0, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4) + ret <4 x i32> %5 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: vpttee_block: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpttee.s32 ge, q0, q2 +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vmove q0, q2 +; CHECK-NEXT: vmove q0, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %a, %c + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) + %2 = xor <4 x i1> %0, + %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3) + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4) + ret <4 x i32> %5 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir @@ -0,0 +1,547 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass arm-mve-vpt-opts %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + ; Functions are intentionally left blank - see the MIR sequences below. + + define arm_aapcs_vfpcc <4 x float> @vcmp_with_opposite_cond(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + define arm_aapcs_vfpcc <4 x float> @vcmp_with_opposite_cond_and_swapped_operands(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + define arm_aapcs_vfpcc <4 x float> @triple_vcmp(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + define arm_aapcs_vfpcc <4 x float> @killed_vccr_values(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + define arm_aapcs_vfpcc <4 x float> @predicated_vcmps(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + define arm_aapcs_vfpcc <4 x float> @flt_with_swapped_operands(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + define arm_aapcs_vfpcc <4 x float> @different_opcodes(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + define arm_aapcs_vfpcc <4 x float> @incorrect_condcode(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + define arm_aapcs_vfpcc <4 x float> @vpr_or_vccr_write_between_vcmps(<4 x float> %inactive1) #0 { + entry: + ret <4 x float> %inactive1 + } + + attributes #0 = { "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" } +... +--- +name: vcmp_with_opposite_cond +alignment: 4 +body: | + ; CHECK-LABEL: name: vcmp_with_opposite_cond + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16_]], 0, $noreg + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf32_]], 0, $noreg + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16_]], 0, $noreg + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT3:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32_]], 0, $noreg + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: [[MVE_VCMPi8_:%[0-9]+]]:vccr = MVE_VCMPi8 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT4:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8_]], 0, $noreg + ; CHECK: bb.5: + ; CHECK: successors: %bb.6(0x80000000) + ; CHECK: [[MVE_VCMPs16_:%[0-9]+]]:vccr = MVE_VCMPs16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT5:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16_]], 0, $noreg + ; CHECK: bb.6: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT6:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg + ; CHECK: bb.7: + ; CHECK: successors: %bb.8(0x80000000) + ; CHECK: [[MVE_VCMPs8_:%[0-9]+]]:vccr = MVE_VCMPs8 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT7:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8_]], 0, $noreg + ; CHECK: bb.8: + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: [[MVE_VCMPu16_:%[0-9]+]]:vccr = MVE_VCMPu16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT8:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16_]], 0, $noreg + ; CHECK: bb.9: + ; CHECK: successors: %bb.10(0x80000000) + ; CHECK: [[MVE_VCMPu32_:%[0-9]+]]:vccr = MVE_VCMPu32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT9:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32_]], 0, $noreg + ; CHECK: bb.10: + ; CHECK: successors: %bb.11(0x80000000) + ; CHECK: [[MVE_VCMPu8_:%[0-9]+]]:vccr = MVE_VCMPu8 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT10:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8_]], 0, $noreg + ; CHECK: bb.11: + ; CHECK: successors: %bb.12(0x80000000) + ; CHECK: [[MVE_VCMPf16r:%[0-9]+]]:vccr = MVE_VCMPf16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT11:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16r]], 0, $noreg + ; CHECK: bb.12: + ; CHECK: successors: %bb.13(0x80000000) + ; CHECK: [[MVE_VCMPf32r:%[0-9]+]]:vccr = MVE_VCMPf32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT12:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf32r]], 0, $noreg + ; CHECK: bb.13: + ; CHECK: successors: %bb.14(0x80000000) + ; CHECK: [[MVE_VCMPi16r:%[0-9]+]]:vccr = MVE_VCMPi16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT13:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16r]], 0, $noreg + ; CHECK: bb.14: + ; CHECK: successors: %bb.15(0x80000000) + ; CHECK: [[MVE_VCMPi32r:%[0-9]+]]:vccr = MVE_VCMPi32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT14:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32r]], 0, $noreg + ; CHECK: bb.15: + ; CHECK: successors: %bb.16(0x80000000) + ; CHECK: [[MVE_VCMPi8r:%[0-9]+]]:vccr = MVE_VCMPi8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT15:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8r]], 0, $noreg + ; CHECK: bb.16: + ; CHECK: successors: %bb.17(0x80000000) + ; CHECK: [[MVE_VCMPs16r:%[0-9]+]]:vccr = MVE_VCMPs16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT16:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16r]], 0, $noreg + ; CHECK: bb.17: + ; CHECK: successors: %bb.18(0x80000000) + ; CHECK: [[MVE_VCMPs32r:%[0-9]+]]:vccr = MVE_VCMPs32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT17:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32r]], 0, $noreg + ; CHECK: bb.18: + ; CHECK: successors: %bb.19(0x80000000) + ; CHECK: [[MVE_VCMPs8r:%[0-9]+]]:vccr = MVE_VCMPs8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT18:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8r]], 0, $noreg + ; CHECK: bb.19: + ; CHECK: successors: %bb.20(0x80000000) + ; CHECK: [[MVE_VCMPu16r:%[0-9]+]]:vccr = MVE_VCMPu16r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT19:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16r]], 0, $noreg + ; CHECK: bb.20: + ; CHECK: successors: %bb.21(0x80000000) + ; CHECK: [[MVE_VCMPu32r:%[0-9]+]]:vccr = MVE_VCMPu32r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT20:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32r]], 0, $noreg + ; CHECK: bb.21: + ; CHECK: successors: %bb.22(0x80000000) + ; CHECK: [[MVE_VCMPu8r:%[0-9]+]]:vccr = MVE_VCMPu8r %1:mqpr, %25:gprwithzr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT21:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8r]], 0, $noreg + ; CHECK: bb.22: + ; CHECK: [[MVE_VCMPu8r1:%[0-9]+]]:vccr = MVE_VCMPu8r %1:mqpr, $zr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT22:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8r1]], 0, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + ; + ; Tests that VCMPs with an opposite condition are correctly converted into VPNOTs. + ; + bb.0: + %3:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %4:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.1: + %5:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %6:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.2: + %7:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %8:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.3: + %9:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %10:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.4: + %11:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 10, 0, $noreg + %12:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.5: + %13:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %14:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.6: + %15:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %16:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.7: + %17:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 10, 0, $noreg + %18:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.8: + %19:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %20:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.9: + %21:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %22:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.10: + %23:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 10, 0, $noreg + %24:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 11, 0, $noreg + + bb.11: + %25:vccr = MVE_VCMPf16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %26:vccr = MVE_VCMPf16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.12: + %27:vccr = MVE_VCMPf32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %28:vccr = MVE_VCMPf32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.13: + %29:vccr = MVE_VCMPi16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %30:vccr = MVE_VCMPi16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.14: + %31:vccr = MVE_VCMPi32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %32:vccr = MVE_VCMPi32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.15: + %33:vccr = MVE_VCMPi8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %34:vccr = MVE_VCMPi8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.16: + %35:vccr = MVE_VCMPs16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %36:vccr = MVE_VCMPs16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.17: + %37:vccr = MVE_VCMPs32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %38:vccr = MVE_VCMPs32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.18: + %39:vccr = MVE_VCMPs8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %40:vccr = MVE_VCMPs8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.19: + %41:vccr = MVE_VCMPu16r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %42:vccr = MVE_VCMPu16r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.20: + %43:vccr = MVE_VCMPu32r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %44:vccr = MVE_VCMPu32r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.21: + %45:vccr = MVE_VCMPu8r %0:mqpr, %2:gprwithzr, 10, 0, $noreg + %46:vccr = MVE_VCMPu8r %0:mqpr, %2:gprwithzr, 11, 0, $noreg + + bb.22: + ; There shouldn't be any exception for $zr, so the second VCMP should + ; be transformed into a VPNOT. + %47:vccr = MVE_VCMPu8r %0:mqpr, $zr, 10, 0, $noreg + %48:vccr = MVE_VCMPu8r %0:mqpr, $zr, 11, 0, $noreg + + tBX_RET 14, $noreg, implicit %0:mqpr +... +--- +name: vcmp_with_opposite_cond_and_swapped_operands +alignment: 4 +body: | + ; CHECK-LABEL: name: vcmp_with_opposite_cond_and_swapped_operands + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi16_]], 0, $noreg + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi32_]], 0, $noreg + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[MVE_VCMPi8_:%[0-9]+]]:vccr = MVE_VCMPi8 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPi8_]], 0, $noreg + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[MVE_VCMPs16_:%[0-9]+]]:vccr = MVE_VCMPs16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT3:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs16_]], 0, $noreg + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT4:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg + ; CHECK: bb.5: + ; CHECK: successors: %bb.6(0x80000000) + ; CHECK: [[MVE_VCMPs8_:%[0-9]+]]:vccr = MVE_VCMPs8 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT5:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs8_]], 0, $noreg + ; CHECK: bb.6: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: [[MVE_VCMPu16_:%[0-9]+]]:vccr = MVE_VCMPu16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT6:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu16_]], 0, $noreg + ; CHECK: bb.7: + ; CHECK: successors: %bb.8(0x80000000) + ; CHECK: [[MVE_VCMPu32_:%[0-9]+]]:vccr = MVE_VCMPu32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT7:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu32_]], 0, $noreg + ; CHECK: bb.8: + ; CHECK: [[MVE_VCMPu8_:%[0-9]+]]:vccr = MVE_VCMPu8 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT8:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPu8_]], 0, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + ; + ; Tests that VCMPs with an opposite condition and swapped operands are + ; correctly converted into VPNOTs. + ; + bb.0: + %2:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %3:vccr = MVE_VCMPi16 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.1: + %4:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %5:vccr = MVE_VCMPi32 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.2: + %6:vccr = MVE_VCMPi8 %0:mqpr, %1:mqpr, 10, 0, $noreg + %7:vccr = MVE_VCMPi8 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.3: + %8:vccr = MVE_VCMPs16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %9:vccr = MVE_VCMPs16 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.4: + %10:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %11:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.5: + %12:vccr = MVE_VCMPs8 %0:mqpr, %1:mqpr, 10, 0, $noreg + %13:vccr = MVE_VCMPs8 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.6: + %14:vccr = MVE_VCMPu16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %15:vccr = MVE_VCMPu16 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.7: + %16:vccr = MVE_VCMPu32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %17:vccr = MVE_VCMPu32 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.8: + %18:vccr = MVE_VCMPu8 %0:mqpr, %1:mqpr, 10, 0, $noreg + %19:vccr = MVE_VCMPu8 %1:mqpr, %0:mqpr, 12, 0, $noreg + + tBX_RET 14, $noreg, implicit %0:mqpr +... +--- +name: triple_vcmp +alignment: 4 +body: | + ; + ; Tests that, when there are 2 "VPNOT-like VCMPs" in a row, only the first + ; becomes a VPNOT. + ; + bb.0: + ; CHECK-LABEL: name: triple_vcmp + ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg + ; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 12, 0, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + %2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %3:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg + %4:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 12, 0, $noreg + tBX_RET 14, $noreg, implicit %0:mqpr +... +--- +name: killed_vccr_values +alignment: 4 +body: | + bb.0: + ; + ; Tests that, if the result of the VCMP is killed before the + ; second VCMP (that will be converted into a VPNOT) is found, + ; the kill flag is removed. + ; + ; CHECK-LABEL: name: killed_vccr_values + ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VORR:%[0-9]+]]:mqpr = MVE_VORR %1:mqpr, %2:mqpr, 1, [[MVE_VCMPf16_]], undef [[MVE_VORR]] + ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPf16_]], 0, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + %2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %3:mqpr = MVE_VORR %0:mqpr, %1:mqpr, 1, killed %2:vccr, undef %3:mqpr + %4:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 0, $noreg + tBX_RET 14, $noreg, implicit %0:mqpr +... +--- +name: predicated_vcmps +alignment: 4 +body: | + ; CHECK-LABEL: name: predicated_vcmps + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[MVE_VCMPi16_:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPi16_1:%[0-9]+]]:vccr = MVE_VCMPi16 %2:mqpr, %1:mqpr, 12, 1, [[MVE_VCMPi16_]] + ; CHECK: [[MVE_VCMPi16_2:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi16_]] + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[MVE_VCMPi32_:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPi32_1:%[0-9]+]]:vccr = MVE_VCMPi32 %2:mqpr, %1:mqpr, 12, 1, [[MVE_VCMPi32_]] + ; CHECK: [[MVE_VCMPi32_2:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi32_]] + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPf16_1:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPf16_]] + ; CHECK: [[MVE_VCMPf16_2:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPf16_]] + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPf32_1:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPf32_]] + ; CHECK: [[MVE_VCMPf32_2:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPf32_]] + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: [[MVE_VCMPi16_3:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPi16_4:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPi16_3]] + ; CHECK: [[MVE_VCMPi16_5:%[0-9]+]]:vccr = MVE_VCMPi16 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi16_3]] + ; CHECK: bb.5: + ; CHECK: [[MVE_VCMPi32_3:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPi32_4:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 11, 1, [[MVE_VCMPi32_3]] + ; CHECK: [[MVE_VCMPi32_5:%[0-9]+]]:vccr = MVE_VCMPi32 %1:mqpr, %2:mqpr, 10, 1, [[MVE_VCMPi32_3]] + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + ; + ; Tests that predicated VCMPs are not replaced. + ; + bb.0: + %2:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %3:vccr = MVE_VCMPi16 %1:mqpr, %0:mqpr, 12, 1, %2:vccr + %4:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 1, %2:vccr + + bb.1: + %5:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %6:vccr = MVE_VCMPi32 %1:mqpr, %0:mqpr, 12, 1, %5:vccr + %7:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 1, %5:vccr + + bb.2: + %8:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %9:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 11, 1, %8:vccr + %10:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 1, %8:vccr + + bb.3: + %11:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %12:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 11, 1, %11:vccr + %13:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 1, %11:vccr + + bb.4: + %14:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %15:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 11, 1, %14:vccr + %16:vccr = MVE_VCMPi16 %0:mqpr, %1:mqpr, 10, 1, %14:vccr + + bb.5: + %17:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %18:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 11, 1, %17:vccr + %19:vccr = MVE_VCMPi32 %0:mqpr, %1:mqpr, 10, 1, %17:vccr + + tBX_RET 14, $noreg, implicit %0:mqpr +... +--- +name: flt_with_swapped_operands +alignment: 4 +body: | + ; CHECK-LABEL: name: flt_with_swapped_operands + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPf16_1:%[0-9]+]]:vccr = MVE_VCMPf16 %2:mqpr, %1:mqpr, 12, 0, $noreg + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[MVE_VCMPf32_:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPf32_1:%[0-9]+]]:vccr = MVE_VCMPf32 %2:mqpr, %1:mqpr, 12, 0, $noreg + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[MVE_VCMPf16_2:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPf16_3:%[0-9]+]]:vccr = MVE_VCMPf16 %2:mqpr, %1:mqpr, 11, 0, $noreg + ; CHECK: bb.3: + ; CHECK: [[MVE_VCMPf32_2:%[0-9]+]]:vccr = MVE_VCMPf32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPf32_3:%[0-9]+]]:vccr = MVE_VCMPf32 %2:mqpr, %1:mqpr, 11, 0, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + ; + ; Tests that float VCMPs with an opposite condition and swapped operands + ; are not transformed into VPNOTs. + ; + bb.0: + %2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %3:vccr = MVE_VCMPf16 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.1: + %4:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %5:vccr = MVE_VCMPf32 %1:mqpr, %0:mqpr, 12, 0, $noreg + + bb.2: + %6:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 10, 0, $noreg + %7:vccr = MVE_VCMPf16 %1:mqpr, %0:mqpr, 11, 0, $noreg + + bb.3: + %8:vccr = MVE_VCMPf32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %9:vccr = MVE_VCMPf32 %1:mqpr, %0:mqpr, 11, 0, $noreg + tBX_RET 14, $noreg, implicit %0:mqpr +... +--- +name: different_opcodes +alignment: 4 +body: | + ; + ; Tests that a "VPNOT-like VCMP" with an opcode different from the previous VCMP + ; is not transformed into a VPNOT. + ; + bb.0: + ; CHECK-LABEL: name: different_opcodes + ; CHECK: [[MVE_VCMPf16_:%[0-9]+]]:vccr = MVE_VCMPf16 %1:mqpr, %2:mqpr, 0, 0, $noreg + ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 1, 1, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + %2:vccr = MVE_VCMPf16 %0:mqpr, %1:mqpr, 0, 0, $noreg + %3:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 1, 1, $noreg + tBX_RET 14, $noreg, implicit %0:mqpr +... +--- +name: incorrect_condcode +alignment: 4 +body: | + ; CHECK-LABEL: name: incorrect_condcode + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 11, 0, $noreg + ; CHECK: bb.1: + ; CHECK: [[MVE_VCMPs32_2:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 10, 0, $noreg + ; CHECK: [[MVE_VCMPs32_3:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 12, 0, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + ; + ; Tests that a VCMP is not transformed into a VPNOT if its CondCode is not + ; the opposite CondCode. + ; + bb.0: + %2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %3:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 11, 0, $noreg + bb.1: + %4:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 10, 0, $noreg + %5:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 12, 0, $noreg + tBX_RET 14, $noreg, implicit %0:mqpr +... +--- +name: vpr_or_vccr_write_between_vcmps +alignment: 4 +body: | + ; + ; Tests that a "VPNOT-like VCMP" will not be transformed into a VPNOT if + ; VCCR/VPR is written to in-between. + ; + bb.0: + ; CHECK-LABEL: name: vpr_or_vccr_write_between_vcmps + ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 %1:mqpr, %2:mqpr, 12, 0, $noreg + ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT killed [[MVE_VCMPs32_]], 0, $noreg + ; CHECK: [[MVE_VCMPs32_1:%[0-9]+]]:vccr = MVE_VCMPs32 %2:mqpr, %1:mqpr, 10, 0, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit %1:mqpr + %2:vccr = MVE_VCMPs32 %0:mqpr, %1:mqpr, 12, 0, $noreg + %3:vccr = MVE_VPNOT killed %2:vccr, 0, $noreg + %4:vccr = MVE_VCMPs32 %1:mqpr, %0:mqpr, 10, 0, $noreg + tBX_RET 14, $noreg, implicit %0:mqpr +...