diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -39,6 +39,7 @@ RISCVRedundantCopyElimination.cpp RISCVRegisterBankInfo.cpp RISCVRegisterInfo.cpp + RISCVRVVInitUndef.cpp RISCVSExtWRemoval.cpp RISCVSubtarget.cpp RISCVTargetMachine.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -59,6 +59,9 @@ FunctionPass *createRISCVInsertVSETVLIPass(); void initializeRISCVInsertVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVInitUndefPass(); +void initializeRISCVInitUndefPass(PassRegistry &); + FunctionPass *createRISCVRedundantCopyEliminationPass(); void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp @@ -0,0 +1,191 @@ +//===- RISCVInitUndef.cpp - Initialize undef vector value to zero ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function pass that initializes undef vector value to +// zero to prevent register allocation resulting in a constraint violated result +// for vector instruction. +// +// RISC-V vector instruction has register overlapping constraint for certain +// instructions, and will cause illegal instruction trap if violated, we use +// early clobber to model this constraint, but it can't prevent register +// allocator allocated same or overlapped if the input register is undef value, +// so convert IMPLICIT_DEF to zero initialized could prevent that happen, it's +// not best way to resolve this, and it might emit redundant zero initialized +// instruction for undef value, so ideally we should model the constraint right, +// but before we model the constraint right, it's the only way to prevent that +// happen. +// +// See also: https://github.com/llvm/llvm-project/issues/50157 +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "riscv-init-undef" +#define RISCV_INIT_UNDEF_NAME "RISCV init undef pass" + +namespace { + +class RISCVInitUndef : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + +public: + static char ID; + + RISCVInitUndef() : MachineFunctionPass(ID) { + initializeRISCVInitUndefPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_INIT_UNDEF_NAME; } + +private: + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + bool handleImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &Inst); + bool isVectorRegClass(const Register &R); +}; + +} // end anonymous namespace + +char RISCVInitUndef::ID = 0; + +INITIALIZE_PASS(RISCVInitUndef, DEBUG_TYPE, RISCV_INIT_UNDEF_NAME, false, false) + +bool RISCVInitUndef::isVectorRegClass(const Register &R) { + unsigned RegClassID = MRI->getRegClass(R)->getID(); + switch (RegClassID) { + case RISCV::VRRegClassID: + case RISCV::VRM2RegClassID: + case RISCV::VRM4RegClassID: + case RISCV::VRM8RegClassID: + return true; + default: + return false; + } +} + +bool RISCVInitUndef::handleImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &Inst) { + MachineInstr &MI = *Inst; + + assert(MI.getOpcode() == TargetOpcode::IMPLICIT_DEF); + // All vector registers must be explicitly defined to prevent violate vector + // register constaint. + unsigned Reg = MI.getOperand(0).getReg(); + // This is a physreg implicit-def. + // Look for the first instruction to use or define an alias. + bool NeedZeroInit = false; + + if (Register::isVirtualRegister(Reg)) { + // For virtual registers, mark all uses as , and convert users to + // implicit-def when possible. + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MO.setIsUndef(); + MachineInstr *UserMI = MO.getParent(); + + if (UserMI->getOpcode() == TargetOpcode::INSERT_SUBREG) { + // FIXME: Should zero init subreg other than the insert part is enough. + NeedZeroInit = true; + break; + } + + for (MachineOperand &UseMO : UserMI->operands()) { + if (UseMO.isReg() && UseMO.isEarlyClobber()) { + NeedZeroInit = true; + break; + } + } + } + } + + if (!NeedZeroInit) + return false; + + LLVM_DEBUG( + dbgs() + << "Emitting vmv.v.i vd, 0 with VLMAX for implicit vector register " + << Reg << '\n'); + + unsigned Opcode; + unsigned RegClassID = MRI->getRegClass(Reg)->getID(); + switch (RegClassID) { + case RISCV::VRRegClassID: + Opcode = RISCV::PseudoVMV_V_I_M1; + break; + case RISCV::VRM2RegClassID: + Opcode = RISCV::PseudoVMV_V_I_M2; + break; + case RISCV::VRM4RegClassID: + Opcode = RISCV::PseudoVMV_V_I_M4; + break; + case RISCV::VRM8RegClassID: + Opcode = RISCV::PseudoVMV_V_I_M8; + break; + default: + llvm_unreachable("Unexpected register class?"); + } + + BuildMI(MBB, Inst, MI.getDebugLoc(), TII->get(Opcode), Reg) + .addImm(0) + .addImm(/* AVL=VLMAX */ -1) + .addImm(/* SEW */ 4); + + Inst = MBB.erase(Inst); // Remove the pseudo instruction + + // We want to leave I pointing to the previous instruction, but what if we + // just erased the first instruction? + if (Inst == MBB.begin()) { + LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n"); + Inst = BuildMI(MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL)); + } else + --Inst; + + return true; +} + +bool RISCVInitUndef::processBasicBlock(MachineFunction &MF, + MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { + MachineInstr &MI = *I; + if (MI.isImplicitDef()) { + auto DstReg = MI.getOperand(0).getReg(); + if (isVectorRegClass(DstReg)) + Changed |= handleImplicitDef(MBB, I); + } + } + return Changed; +} + +bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) { + // return false; + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasVInstructions()) + return false; + + MRI = &MF.getRegInfo(); + TII = ST.getInstrInfo(); + + bool Changed = false; + for (MachineBasicBlock &BB : MF) + Changed |= processBasicBlock(MF, BB); + + return Changed; +} + +FunctionPass *llvm::createRISCVInitUndefPass() { return new RISCVInitUndef(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -53,6 +53,7 @@ initializeRISCVSExtWRemovalPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); + initializeRISCVInitUndefPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -253,6 +254,9 @@ void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createRISCVMergeBaseOffsetOptPass()); + + if (getOptimizeRegAlloc()) + addPass(createRISCVInitUndefPass()); addPass(createRISCVInsertVSETVLIPass()); } diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -96,6 +96,7 @@ ; CHECK-NEXT: Remove dead machine instructions ; RV64-NEXT: RISCV sext.w Removal ; CHECK-NEXT: RISCV Merge Base Offset +; CHECK-NEXT: RISCV init undef pass ; CHECK-NEXT: RISCV Insert VSETVLI pass ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Process Implicit Definitions diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -24,6 +24,8 @@ ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: li a0, 55 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v8, (a0), v8 ; CHECK-NEXT: csrr a0, vlenb @@ -35,9 +37,11 @@ ; CHECK-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: li s0, 36 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetvli zero, s0, e16, m4, tu, mu ; CHECK-NEXT: vfwadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb @@ -47,6 +51,9 @@ ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: call func@plt ; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu ; CHECK-NEXT: vrgather.vv v4, v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, mu @@ -100,6 +107,8 @@ ; SUBREGLIVENESS-NEXT: slli a0, a0, 4 ; SUBREGLIVENESS-NEXT: sub sp, sp, a0 ; SUBREGLIVENESS-NEXT: li a0, 55 +; SUBREGLIVENESS-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v8, (a0), v8 ; SUBREGLIVENESS-NEXT: csrr a0, vlenb @@ -111,15 +120,20 @@ ; SUBREGLIVENESS-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: add a0, a0, a1 ; SUBREGLIVENESS-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill -; SUBREGLIVENESS-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; SUBREGLIVENESS-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; SUBREGLIVENESS-NEXT: vmclr.m v0 ; SUBREGLIVENESS-NEXT: li s0, 36 +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 ; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, tu, mu ; SUBREGLIVENESS-NEXT: vfwadd.vv v8, v8, v8, v0.t ; SUBREGLIVENESS-NEXT: addi a0, sp, 16 ; SUBREGLIVENESS-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: call func@plt ; SUBREGLIVENESS-NEXT: li a0, 32 +; SUBREGLIVENESS-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 +; SUBREGLIVENESS-NEXT: vmv.v.i v8, 0 ; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, tu, mu ; SUBREGLIVENESS-NEXT: vrgather.vv v16, v8, v8, v0.t ; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -472,6 +472,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vslidedown.vx v11, v10, a0 ; CHECK-NEXT: vslidedown.vx v8, v9, a0 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vi v9, v11, 0 ; CHECK-NEXT: add a1, a0, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-fptrunc-vp.ll @@ -111,8 +111,10 @@ ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a0, a1, .LBB7_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 @@ -122,6 +124,8 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu ; CHECK-NEXT: vslideup.vi v16, v8, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -70,8 +70,10 @@ ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a0, a1, .LBB4_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 64 @@ -81,6 +83,8 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: li a0, 128 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu ; CHECK-NEXT: vslideup.vx v16, v8, a1 @@ -278,7 +282,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v28, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v16, (a3) +; CHECK-NEXT: vle64.v v8, (a3) ; CHECK-NEXT: addi t0, a5, -16 ; CHECK-NEXT: addi a6, a1, 512 ; CHECK-NEXT: mv a3, a2 @@ -286,10 +290,10 @@ ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a3, t0 ; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: vle64.v v16, (a6) ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, mu ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: vncvt.x.x.w v24, v16, v0.t +; CHECK-NEXT: vncvt.x.x.w v24, v8, v0.t ; CHECK-NEXT: csrr a6, vlenb ; CHECK-NEXT: slli a6, a6, 4 ; CHECK-NEXT: add a6, sp, a6 @@ -302,13 +306,13 @@ ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu ; CHECK-NEXT: li a5, 64 ; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t ; CHECK-NEXT: csrr a6, vlenb ; CHECK-NEXT: li t0, 48 ; CHECK-NEXT: mul a6, a6, t0 ; CHECK-NEXT: add a6, sp, a6 ; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a7, a5, .LBB16_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: li a7, 64 @@ -362,7 +366,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v2, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v24, (t0) +; CHECK-NEXT: vle64.v v8, (t0) ; CHECK-NEXT: addi t0, a4, -16 ; CHECK-NEXT: addi a6, a1, 256 ; CHECK-NEXT: mv a1, a2 @@ -370,18 +374,26 @@ ; CHECK-NEXT: # %bb.19: ; CHECK-NEXT: mv a1, t0 ; CHECK-NEXT: .LBB16_20: -; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: vle64.v v24, (a6) ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a3, .LBB16_22 ; CHECK-NEXT: # %bb.21: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB16_22: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vncvt.x.x.w v24, v8, v0.t +; CHECK-NEXT: vncvt.x.x.w v8, v24, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a7, a5, .LBB16_24 ; CHECK-NEXT: # %bb.23: ; CHECK-NEXT: li a7, 32 @@ -399,53 +411,55 @@ ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 40 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a4, 40 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v24, v8, 16 +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v16, 16 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v24, v16, v0.t ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a7, a3, .LBB16_28 ; CHECK-NEXT: # %bb.27: ; CHECK-NEXT: li a7, 16 @@ -456,18 +470,20 @@ ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v0, 0 ; CHECK-NEXT: vsetvli zero, a5, e32, m8, tu, mu ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: vse32.v v16, (a0) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li a3, 40 @@ -516,8 +532,10 @@ ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB17_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: bltu a0, a1, .LBB17_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 @@ -527,6 +545,8 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu ; CHECK-NEXT: vslideup.vi v16, v8, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -511,6 +511,8 @@ ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vslide1up.vx v9, v8, a1 ; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vsetivli zero, 1, e64, m1, tu, mu ; RV32-NEXT: vslideup.vi v8, v10, 0 ; RV32-NEXT: ret @@ -550,6 +552,8 @@ ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vslide1up.vx v9, v8, a1 ; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vsetivli zero, 1, e64, m1, tu, mu ; RV32-NEXT: vslideup.vi v8, v10, 0 ; RV32-NEXT: ret @@ -589,6 +593,8 @@ ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vslide1up.vx v9, v8, a1 ; RV32-NEXT: vslide1up.vx v10, v9, a0 +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vsetivli zero, 1, e64, m1, tu, mu ; RV32-NEXT: vslideup.vi v8, v10, 0 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -1787,10 +1787,14 @@ ; LMULMAX4-NEXT: vncvt.x.x.w v16, v12 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v12, v16 -; LMULMAX4-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; LMULMAX4-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX4-NEXT: vmv.v.i v14, 0 +; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v14, v8 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v8, v14 +; LMULMAX4-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX4-NEXT: vmv.v.i v10, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, tu, mu ; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 ; LMULMAX4-NEXT: vse16.v v8, (a0) @@ -1836,7 +1840,12 @@ ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v16, v12 +; LMULMAX4-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; LMULMAX4-NEXT: vmv.v.i v12, 0 +; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX4-NEXT: vncvt.x.x.w v12, v8 +; LMULMAX4-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; LMULMAX4-NEXT: vmv.v.i v8, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, tu, mu ; LMULMAX4-NEXT: vslideup.vi v12, v16, 8 ; LMULMAX4-NEXT: vse32.v v12, (a0) @@ -2136,7 +2145,9 @@ ; CHECK-NEXT: vfncvt.rod.f.f.w v24, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v24 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vfncvt.rod.f.f.w v12, v16 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v10, v12 @@ -2152,6 +2163,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v24, v8 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v28, v16 ; CHECK-NEXT: vs8r.v v24, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll @@ -202,6 +202,8 @@ ; RV32-FP-NEXT: vmv.v.i v8, 0 ; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 ; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-FP-NEXT: vmv.v.i v8, 0 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, tu, mu ; RV32-FP-NEXT: vslideup.vi v8, v10, 0 ; RV32-FP-NEXT: ret @@ -222,6 +224,8 @@ ; RV32-FP-NEXT: vmv.v.i v8, 0 ; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 ; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-FP-NEXT: vmv.v.i v8, 0 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, tu, mu ; RV32-FP-NEXT: vslideup.vi v8, v10, 0 ; RV32-FP-NEXT: ret @@ -242,6 +246,8 @@ ; RV32-FP-NEXT: vmv.v.i v8, 0 ; RV32-FP-NEXT: vslide1up.vx v9, v8, a1 ; RV32-FP-NEXT: vslide1up.vx v10, v9, a0 +; RV32-FP-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-FP-NEXT: vmv.v.i v8, 0 ; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, tu, mu ; RV32-FP-NEXT: vslideup.vi v8, v10, 0 ; RV32-FP-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -39,6 +39,9 @@ ; RV32-V128-LABEL: interleave_v2f64: ; RV32-V128: # %bb.0: ; RV32-V128-NEXT: vmv1r.v v12, v9 +; RV32-V128-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV32-V128-NEXT: vmv.v.i v10, 0 +; RV32-V128-NEXT: vmv.v.i v10, 0 ; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32-V128-NEXT: vid.v v9 ; RV32-V128-NEXT: vsrl.vi v9, v9, 1 @@ -53,6 +56,9 @@ ; RV64-V128-LABEL: interleave_v2f64: ; RV64-V128: # %bb.0: ; RV64-V128-NEXT: vmv1r.v v12, v9 +; RV64-V128-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-V128-NEXT: vmv.v.i v10, 0 +; RV64-V128-NEXT: vmv.v.i v10, 0 ; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-V128-NEXT: vid.v v10 ; RV64-V128-NEXT: vsrl.vi v14, v10, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -249,6 +249,8 @@ define <4 x half> @slideup_v4f16(<4 x half> %x) { ; CHECK-LABEL: slideup_v4f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, mu ; CHECK-NEXT: vslideup.vi v9, v8, 1 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -260,6 +262,8 @@ define <8 x float> @slideup_v8f32(<8 x float> %x) { ; CHECK-LABEL: slideup_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu ; CHECK-NEXT: vslideup.vi v10, v8, 3 ; CHECK-NEXT: vmv2r.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll @@ -61,7 +61,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll @@ -61,7 +61,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfcvt.rtz.xu.f.v v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -14,6 +14,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, mu ; CHECK-NEXT: vslideup.vi v8, v12, 0 ; CHECK-NEXT: ret @@ -27,6 +29,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, mu ; CHECK-NEXT: vslideup.vi v8, v12, 2 ; CHECK-NEXT: ret @@ -40,6 +44,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, mu ; CHECK-NEXT: vslideup.vi v8, v12, 6 ; CHECK-NEXT: ret @@ -53,20 +59,27 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v12, (a0) +; LMULMAX2-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; LMULMAX2-NEXT: vmv.v.i v16, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m4, tu, mu ; LMULMAX2-NEXT: vslideup.vi v8, v12, 0 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vle32.v v12, (a1) +; LMULMAX1-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; LMULMAX1-NEXT: vmv.v.i v16, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle32.v v16, (a0) +; LMULMAX1-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; LMULMAX1-NEXT: vmv.v.i v20, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v12, 0 +; LMULMAX1-NEXT: vslideup.vi v8, v16, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v16, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v12, 4 ; LMULMAX1-NEXT: ret %sv = load <8 x i32>, <8 x i32>* %svp %v = call @llvm.vector.insert.v8i32.nxv8i32( %vec, <8 x i32> %sv, i64 0) @@ -78,20 +91,27 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v12, (a0) +; LMULMAX2-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; LMULMAX2-NEXT: vmv.v.i v16, 0 ; LMULMAX2-NEXT: vsetivli zero, 16, e32, m4, tu, mu ; LMULMAX2-NEXT: vslideup.vi v8, v12, 8 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vle32.v v12, (a1) +; LMULMAX1-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; LMULMAX1-NEXT: vmv.v.i v16, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vle32.v v16, (a0) +; LMULMAX1-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; LMULMAX1-NEXT: vmv.v.i v20, 0 ; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v12, 8 +; LMULMAX1-NEXT: vslideup.vi v8, v16, 8 ; LMULMAX1-NEXT: vsetivli zero, 16, e32, m4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v16, 12 +; LMULMAX1-NEXT: vslideup.vi v8, v12, 12 ; LMULMAX1-NEXT: ret %sv = load <8 x i32>, <8 x i32>* %svp %v = call @llvm.vector.insert.v8i32.nxv8i32( %vec, <8 x i32> %sv, i64 8) @@ -103,6 +123,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: ret %sv = load <2 x i32>, <2 x i32>* %svp %v = call @llvm.vector.insert.v2i32.nxv8i32( undef, <2 x i32> %sv, i64 0) @@ -165,6 +187,8 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a1) +; LMULMAX2-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX2-NEXT: vmv.v.i v10, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v10, (a0) ; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, tu, mu @@ -196,6 +220,8 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a1) +; LMULMAX2-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX2-NEXT: vmv.v.i v10, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v10, (a0) ; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, mu @@ -226,6 +252,8 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a1) +; LMULMAX2-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX2-NEXT: vmv.v.i v10, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v10, (a0) ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m2, tu, mu @@ -256,6 +284,9 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a1) +; LMULMAX2-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; LMULMAX2-NEXT: vmv.v.i v10, 0 +; LMULMAX2-NEXT: vmv.v.i v10, 0 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, tu, mu ; LMULMAX2-NEXT: vslideup.vi v10, v8, 6 ; LMULMAX2-NEXT: vse32.v v10, (a0) @@ -265,6 +296,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vle32.v v8, (a1) +; LMULMAX1-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; LMULMAX1-NEXT: vmv.v.i v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu ; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 ; LMULMAX1-NEXT: addi a0, a0, 16 @@ -513,7 +546,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, mu ; CHECK-NEXT: vslideup.vi v8, v16, 4 ; CHECK-NEXT: vs8r.v v8, (a2) @@ -531,6 +569,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vs8r.v v8, (a1) ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv @@ -544,6 +584,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, mu ; CHECK-NEXT: vslideup.vi v16, v8, 2 ; CHECK-NEXT: vs8r.v v16, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -43,13 +43,17 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetvli a3, zero, e16, m2, ta, mu +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: lw a3, 16(a0) ; RV32-NEXT: addi a4, a0, 20 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vlse32.v v10, (a4), zero ; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; RV32-NEXT: vmv.s.x v10, a3 -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; RV32-NEXT: vsetvli a3, zero, e16, m2, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu ; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, mu ; RV32-NEXT: vmv.v.i v10, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -52,6 +52,9 @@ ; RV32-V128-LABEL: interleave_v2i64: ; RV32-V128: # %bb.0: ; RV32-V128-NEXT: vmv1r.v v12, v9 +; RV32-V128-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV32-V128-NEXT: vmv.v.i v10, 0 +; RV32-V128-NEXT: vmv.v.i v10, 0 ; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32-V128-NEXT: vid.v v9 ; RV32-V128-NEXT: vsrl.vi v9, v9, 1 @@ -66,6 +69,9 @@ ; RV64-V128-LABEL: interleave_v2i64: ; RV64-V128: # %bb.0: ; RV64-V128-NEXT: vmv1r.v v12, v9 +; RV64-V128-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-V128-NEXT: vmv.v.i v10, 0 +; RV64-V128-NEXT: vmv.v.i v10, 0 ; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-V128-NEXT: vid.v v10 ; RV64-V128-NEXT: vsrl.vi v14, v10, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -538,6 +538,8 @@ define <4 x i16> @slideup_v4i16(<4 x i16> %x) { ; CHECK-LABEL: slideup_v4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, mu ; CHECK-NEXT: vslideup.vi v9, v8, 1 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -549,6 +551,8 @@ define <8 x i32> @slideup_v8i32(<8 x i32> %x) { ; CHECK-LABEL: slideup_v8i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu ; CHECK-NEXT: vslideup.vi v10, v8, 3 ; CHECK-NEXT: vmv2r.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12661,6 +12661,8 @@ ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-NEXT: vmv1r.v v12, v10 ; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t +; RV64V-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; RV64V-NEXT: vmv.v.i v14, 0 ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, mu ; RV64V-NEXT: vslidedown.vi v10, v10, 16 ; RV64V-NEXT: vslidedown.vi v8, v8, 16 @@ -12670,6 +12672,8 @@ ; RV64V-NEXT: vslidedown.vi v0, v0, 2 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64V-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64V-NEXT: vmv.v.i v8, 0 ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, tu, mu ; RV64V-NEXT: vslideup.vi v12, v10, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -570,16 +570,19 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: addi a4, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vle16.v v24, (a4) +; CHECK-NEXT: vle16.v v8, (a4) ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu ; CHECK-NEXT: addi a4, a2, -64 ; CHECK-NEXT: vslidedown.vi v0, v0, 8 @@ -588,23 +591,34 @@ ; CHECK-NEXT: mv a3, a4 ; CHECK-NEXT: .LBB43_2: ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v2, v16, v24, v0.t +; CHECK-NEXT: vle16.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, mu +; CHECK-NEXT: vmfeq.vv v2, v16, v8, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB43_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: .LBB43_4: -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vsetivli zero, 16, e8, m1, tu, mu ; CHECK-NEXT: vslideup.vi v16, v2, 8 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -617,7 +631,9 @@ define <7 x i1> @fcmp_oeq_vv_v7f64(<7 x double> %va, <7 x double> %vb, <7 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vv_v7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -630,7 +646,9 @@ define <8 x i1> @fcmp_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -641,7 +659,9 @@ define <8 x i1> @fcmp_oeq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -654,7 +674,9 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -667,7 +689,9 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -678,7 +702,9 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -691,7 +717,9 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -704,7 +732,9 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -715,7 +745,9 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -728,7 +760,9 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -741,7 +775,9 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -752,7 +788,9 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -765,7 +803,9 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -778,7 +818,9 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -789,7 +831,9 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -802,7 +846,9 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -815,10 +861,14 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v17, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", <8 x i1> %m, i32 %evl) @@ -828,10 +878,14 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -843,10 +897,14 @@ define <8 x i1> @fcmp_one_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -858,10 +916,14 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmand.mm v0, v12, v16 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", <8 x i1> %m, i32 %evl) @@ -873,10 +935,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmand.mm v0, v12, v16 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -890,10 +956,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmand.mm v0, v16, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -905,10 +975,14 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v17, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", <8 x i1> %m, i32 %evl) @@ -918,10 +992,14 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -933,10 +1011,14 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -948,9 +1030,10 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) @@ -960,9 +1043,10 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -974,9 +1058,10 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -988,9 +1073,10 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) @@ -1000,9 +1086,10 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -1014,9 +1101,10 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -1028,9 +1116,10 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) @@ -1040,9 +1129,10 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -1054,9 +1144,10 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -1068,9 +1159,10 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", <8 x i1> %m, i32 %evl) @@ -1080,9 +1172,10 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -1094,9 +1187,10 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -1108,7 +1202,9 @@ define <8 x i1> @fcmp_une_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1119,7 +1215,9 @@ define <8 x i1> @fcmp_une_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1132,7 +1230,9 @@ define <8 x i1> @fcmp_une_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vf_swap_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1145,10 +1245,14 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v16, v12, v12, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmor.mm v0, v12, v16 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", <8 x i1> %m, i32 %evl) @@ -1160,10 +1264,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmor.mm v0, v12, v16 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -1177,10 +1285,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmor.mm v0, v16, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 @@ -1206,10 +1318,7 @@ ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a3, a2, -16 ; CHECK-NEXT: csrr a1, vlenb @@ -1223,30 +1332,37 @@ ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB87_2: ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t ; CHECK-NEXT: bltu a2, a0, .LBB87_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB87_4: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, mu ; CHECK-NEXT: vslideup.vi v16, v1, 2 ; CHECK-NEXT: vmv1r.v v0, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -647,18 +647,20 @@ ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: bltu a3, a4, .LBB51_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: li a0, 128 ; CHECK-NEXT: .LBB51_2: ; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: vlm.v v24, (a2) -; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: addi a0, a3, -128 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -670,7 +672,9 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a4, a0 ; CHECK-NEXT: .LBB51_4: -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 @@ -704,14 +708,18 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a4 ; CHECK-NEXT: .LBB52_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a4, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB52_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB52_4: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -736,14 +744,18 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a4 ; CHECK-NEXT: .LBB53_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a4, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB53_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB53_4: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -760,7 +772,9 @@ define <8 x i1> @icmp_eq_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -771,7 +785,9 @@ define <8 x i1> @icmp_eq_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -784,7 +800,9 @@ define <8 x i1> @icmp_eq_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -797,7 +815,9 @@ define <8 x i1> @icmp_eq_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -810,7 +830,9 @@ define <8 x i1> @icmp_eq_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -823,7 +845,9 @@ define <8 x i1> @icmp_ne_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -834,7 +858,9 @@ define <8 x i1> @icmp_ne_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vx_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -847,7 +873,9 @@ define <8 x i1> @icmp_ne_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vx_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -860,7 +888,9 @@ define <8 x i1> @icmp_ne_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -873,7 +903,9 @@ define <8 x i1> @icmp_ne_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -886,7 +918,9 @@ define <8 x i1> @icmp_ugt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -897,7 +931,9 @@ define <8 x i1> @icmp_ugt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vx_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -910,7 +946,9 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vx_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -923,7 +961,9 @@ define <8 x i1> @icmp_ugt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -936,7 +976,9 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -949,7 +991,9 @@ define <8 x i1> @icmp_uge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -962,7 +1006,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -975,7 +1021,9 @@ define <8 x i1> @icmp_uge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -988,7 +1036,9 @@ define <8 x i1> @icmp_uge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1001,7 +1051,9 @@ define <8 x i1> @icmp_uge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1014,7 +1066,9 @@ define <8 x i1> @icmp_ult_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1025,7 +1079,9 @@ define <8 x i1> @icmp_ult_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vx_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1038,7 +1094,9 @@ define <8 x i1> @icmp_ult_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vx_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1051,7 +1109,9 @@ define <8 x i1> @icmp_ult_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1064,7 +1124,9 @@ define <8 x i1> @icmp_ult_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1077,7 +1139,9 @@ define <8 x i1> @icmp_sgt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1088,7 +1152,9 @@ define <8 x i1> @icmp_sgt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vx_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1101,7 +1167,9 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vx_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1114,7 +1182,9 @@ define <8 x i1> @icmp_sgt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1127,7 +1197,9 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1140,7 +1212,9 @@ define <8 x i1> @icmp_sge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1153,7 +1227,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1166,7 +1242,9 @@ define <8 x i1> @icmp_sge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1179,7 +1257,9 @@ define <8 x i1> @icmp_sge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1192,7 +1272,9 @@ define <8 x i1> @icmp_sge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1205,7 +1287,9 @@ define <8 x i1> @icmp_slt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1216,7 +1300,9 @@ define <8 x i1> @icmp_slt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vx_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1229,7 +1315,9 @@ define <8 x i1> @icmp_slt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vx_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1242,7 +1330,9 @@ define <8 x i1> @icmp_slt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1255,7 +1345,9 @@ define <8 x i1> @icmp_slt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1268,7 +1360,9 @@ define <8 x i1> @icmp_sle_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vv_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1279,7 +1373,9 @@ define <8 x i1> @icmp_sle_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1294,7 +1390,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1307,7 +1405,9 @@ define <8 x i1> @icmp_sle_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vi_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1320,7 +1420,9 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vi_swap_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1338,16 +1440,19 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: addi a4, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vle32.v v24, (a4) +; CHECK-NEXT: vle32.v v8, (a4) ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; CHECK-NEXT: addi a4, a2, -32 ; CHECK-NEXT: vslidedown.vi v0, v0, 4 @@ -1356,23 +1461,34 @@ ; CHECK-NEXT: mv a3, a4 ; CHECK-NEXT: .LBB99_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vmseq.vv v2, v16, v8, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB99_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB99_4: -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu ; CHECK-NEXT: vslideup.vi v16, v2, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1392,14 +1508,18 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB100_2: -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t ; CHECK-NEXT: bltu a1, a2, .LBB100_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB100_4: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu @@ -1424,14 +1544,18 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB101_2: -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t ; CHECK-NEXT: bltu a1, a2, .LBB101_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB101_4: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu @@ -1449,7 +1573,9 @@ define <8 x i1> @icmp_eq_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmseq.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1467,7 +1593,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmseq.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1475,7 +1603,9 @@ ; ; RV64-LABEL: icmp_eq_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmseq.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1495,7 +1625,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmseq.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1503,7 +1635,9 @@ ; ; RV64-LABEL: icmp_eq_vx_swap_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmseq.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1516,7 +1650,9 @@ define <8 x i1> @icmp_eq_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1529,7 +1665,9 @@ define <8 x i1> @icmp_eq_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1542,7 +1680,9 @@ define <8 x i1> @icmp_ne_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsne.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1560,7 +1700,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsne.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1568,7 +1710,9 @@ ; ; RV64-LABEL: icmp_ne_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsne.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1588,7 +1732,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsne.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1596,7 +1742,9 @@ ; ; RV64-LABEL: icmp_ne_vx_swap_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsne.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1609,7 +1757,9 @@ define <8 x i1> @icmp_ne_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1622,7 +1772,9 @@ define <8 x i1> @icmp_ne_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1635,7 +1787,9 @@ define <8 x i1> @icmp_ugt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1653,7 +1807,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsltu.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1661,7 +1817,9 @@ ; ; RV64-LABEL: icmp_ugt_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsgtu.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1681,7 +1839,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsltu.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1689,7 +1849,9 @@ ; ; RV64-LABEL: icmp_ugt_vx_swap_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsltu.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1702,7 +1864,9 @@ define <8 x i1> @icmp_ugt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1715,7 +1879,9 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1728,7 +1894,9 @@ define <8 x i1> @icmp_uge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1746,7 +1914,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsleu.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1756,7 +1926,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.v.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsleu.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1776,7 +1948,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsleu.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1784,7 +1958,9 @@ ; ; RV64-LABEL: icmp_uge_vx_swap_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsleu.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1797,7 +1973,9 @@ define <8 x i1> @icmp_uge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1810,7 +1988,9 @@ define <8 x i1> @icmp_uge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1823,7 +2003,9 @@ define <8 x i1> @icmp_ult_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1841,7 +2023,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsltu.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1849,7 +2033,9 @@ ; ; RV64-LABEL: icmp_ult_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsltu.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1869,7 +2055,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsltu.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1877,7 +2065,9 @@ ; ; RV64-LABEL: icmp_ult_vx_swap_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsgtu.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1890,7 +2080,9 @@ define <8 x i1> @icmp_ult_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1903,7 +2095,9 @@ define <8 x i1> @icmp_ult_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1916,7 +2110,9 @@ define <8 x i1> @icmp_sgt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmslt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1934,7 +2130,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmslt.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1942,7 +2140,9 @@ ; ; RV64-LABEL: icmp_sgt_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsgt.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1962,7 +2162,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmslt.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1970,7 +2172,9 @@ ; ; RV64-LABEL: icmp_sgt_vx_swap_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmslt.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -1983,7 +2187,9 @@ define <8 x i1> @icmp_sgt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1996,7 +2202,9 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2009,7 +2217,9 @@ define <8 x i1> @icmp_sge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2027,7 +2237,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsle.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -2037,7 +2249,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.v.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsle.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -2057,7 +2271,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsle.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -2065,7 +2281,9 @@ ; ; RV64-LABEL: icmp_sge_vx_swap_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsle.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -2078,7 +2296,9 @@ define <8 x i1> @icmp_sge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2091,7 +2311,9 @@ define <8 x i1> @icmp_sge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2104,7 +2326,9 @@ define <8 x i1> @icmp_slt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmslt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2122,7 +2346,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmslt.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -2130,7 +2356,9 @@ ; ; RV64-LABEL: icmp_slt_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmslt.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -2150,7 +2378,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmslt.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -2158,7 +2388,9 @@ ; ; RV64-LABEL: icmp_slt_vx_swap_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsgt.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -2171,7 +2403,9 @@ define <8 x i1> @icmp_slt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2184,7 +2418,9 @@ define <8 x i1> @icmp_slt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2197,7 +2433,9 @@ define <8 x i1> @icmp_sle_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vv_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2215,7 +2453,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsle.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -2223,7 +2463,9 @@ ; ; RV64-LABEL: icmp_sle_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsle.vx v12, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -2243,7 +2485,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vmsle.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -2253,7 +2497,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.v.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsle.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: ret @@ -2266,7 +2512,9 @@ define <8 x i1> @icmp_sle_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vi_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2279,7 +2527,9 @@ define <8 x i1> @icmp_sle_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vi_swap_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -300,7 +300,9 @@ ; RV64-NEXT: vslidedown.vi v0, v10, 2 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, mu ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vmv.v.i v14, 0 ; RV64-NEXT: bltu a1, a2, .LBB13_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 @@ -310,6 +312,8 @@ ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, mu ; RV64-NEXT: vslideup.vi v8, v12, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -367,6 +367,8 @@ define @insert_nxv32f16_undef_nxv1f16_0( %subvec) { ; CHECK-LABEL: insert_nxv32f16_undef_nxv1f16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv1f16.nxv32f16( undef, %subvec, i64 0) ret %v @@ -379,8 +381,12 @@ ; CHECK-NEXT: srli a1, a0, 3 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v14, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv1f16.nxv32f16( undef, %subvec, i64 26) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -2213,6 +2213,8 @@ ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu @@ -2237,6 +2239,8 @@ ; RV32-NEXT: vsext.vf4 v16, v8 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu ; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, mu @@ -2255,6 +2259,8 @@ ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; RV64-NEXT: vmv.v.i v20, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -502,15 +502,17 @@ ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, mu ; RV32-BITS-UNKNOWN-NEXT: vid.v v8 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 -; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v24, v8, 1, v0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v24, v16 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v28, v16 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; @@ -536,15 +538,17 @@ ; RV32-BITS-512-NEXT: addi a0, a0, -1 ; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, mu ; RV32-BITS-512-NEXT: vid.v v8 -; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-512-NEXT: vrsub.vx v16, v8, a0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV32-BITS-512-NEXT: vmv.v.i v16, 0 -; RV32-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 +; RV32-BITS-512-NEXT: vmv.v.i v8, 0 +; RV32-BITS-512-NEXT: vmerge.vim v24, v8, 1, v0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; RV32-BITS-512-NEXT: vrgather.vv v28, v16, v8 -; RV32-BITS-512-NEXT: vrgather.vv v24, v20, v8 +; RV32-BITS-512-NEXT: vrgather.vv v12, v24, v16 +; RV32-BITS-512-NEXT: vrgather.vv v8, v28, v16 +; RV32-BITS-512-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV32-BITS-512-NEXT: vmv.v.i v16, 0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV32-BITS-512-NEXT: vand.vi v8, v24, 1 +; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-512-NEXT: ret ; @@ -555,15 +559,17 @@ ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, mu ; RV64-BITS-UNKNOWN-NEXT: vid.v v8 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 -; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v24, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v24, v16 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v28, v16 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; @@ -589,15 +595,17 @@ ; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, mu ; RV64-BITS-512-NEXT: vid.v v8 -; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-512-NEXT: vrsub.vx v16, v8, a0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV64-BITS-512-NEXT: vmv.v.i v16, 0 -; RV64-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 +; RV64-BITS-512-NEXT: vmv.v.i v8, 0 +; RV64-BITS-512-NEXT: vmerge.vim v24, v8, 1, v0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; RV64-BITS-512-NEXT: vrgather.vv v28, v16, v8 -; RV64-BITS-512-NEXT: vrgather.vv v24, v20, v8 +; RV64-BITS-512-NEXT: vrgather.vv v12, v24, v16 +; RV64-BITS-512-NEXT: vrgather.vv v8, v28, v16 +; RV64-BITS-512-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-BITS-512-NEXT: vmv.v.i v16, 0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; RV64-BITS-512-NEXT: vand.vi v8, v24, 1 +; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-512-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv64i1( %a) @@ -1082,7 +1090,9 @@ ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, mu ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v12, v24 -; RV32-BITS-UNKNOWN-NEXT: vmv8r.v v8, v16 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv64i8: @@ -1107,6 +1117,8 @@ ; RV32-BITS-512-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV32-BITS-512-NEXT: vrgather.vv v16, v12, v24 +; RV32-BITS-512-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmv8r.v v8, v16 ; RV32-BITS-512-NEXT: ret ; @@ -1121,7 +1133,9 @@ ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, mu ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v12, v24 -; RV64-BITS-UNKNOWN-NEXT: vmv8r.v v8, v16 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv64i8: @@ -1146,6 +1160,8 @@ ; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0 ; RV64-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV64-BITS-512-NEXT: vrgather.vv v16, v12, v24 +; RV64-BITS-512-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmv8r.v v8, v16 ; RV64-BITS-512-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv64i8( %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -567,7 +567,9 @@ define @fcmp_oeq_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -578,7 +580,9 @@ define @fcmp_oeq_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -591,7 +595,9 @@ define @fcmp_oeq_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -604,7 +610,9 @@ define @fcmp_ogt_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -615,7 +623,9 @@ define @fcmp_ogt_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -628,7 +638,9 @@ define @fcmp_ogt_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -641,7 +653,9 @@ define @fcmp_oge_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -652,7 +666,9 @@ define @fcmp_oge_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -665,7 +681,9 @@ define @fcmp_oge_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -678,7 +696,9 @@ define @fcmp_olt_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -689,7 +709,9 @@ define @fcmp_olt_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -702,7 +724,9 @@ define @fcmp_olt_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -715,7 +739,9 @@ define @fcmp_ole_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -726,7 +752,9 @@ define @fcmp_ole_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -739,7 +767,9 @@ define @fcmp_ole_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -752,10 +782,14 @@ define @fcmp_one_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f16( %va, %vb, metadata !"one", %m, i32 %evl) @@ -765,10 +799,14 @@ define @fcmp_one_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -780,10 +818,14 @@ define @fcmp_one_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -795,10 +837,14 @@ define @fcmp_ord_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmand.mm v0, v10, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f16( %va, %vb, metadata !"ord", %m, i32 %evl) @@ -810,10 +856,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmand.mm v0, v10, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -827,10 +877,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmand.mm v0, v12, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -842,10 +896,14 @@ define @fcmp_ueq_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f16( %va, %vb, metadata !"ueq", %m, i32 %evl) @@ -855,10 +913,14 @@ define @fcmp_ueq_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -870,10 +932,14 @@ define @fcmp_ueq_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -885,9 +951,10 @@ define @fcmp_ugt_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f16( %va, %vb, metadata !"ugt", %m, i32 %evl) @@ -897,9 +964,10 @@ define @fcmp_ugt_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -911,9 +979,10 @@ define @fcmp_ugt_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -925,9 +994,10 @@ define @fcmp_uge_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f16( %va, %vb, metadata !"uge", %m, i32 %evl) @@ -937,9 +1007,10 @@ define @fcmp_uge_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -951,9 +1022,10 @@ define @fcmp_uge_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -965,9 +1037,10 @@ define @fcmp_ult_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f16( %va, %vb, metadata !"ult", %m, i32 %evl) @@ -977,9 +1050,10 @@ define @fcmp_ult_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -991,9 +1065,10 @@ define @fcmp_ult_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1005,9 +1080,10 @@ define @fcmp_ule_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f16( %va, %vb, metadata !"ule", %m, i32 %evl) @@ -1017,9 +1093,10 @@ define @fcmp_ule_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1031,9 +1108,10 @@ define @fcmp_ule_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1045,7 +1123,9 @@ define @fcmp_une_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1056,7 +1136,9 @@ define @fcmp_une_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1069,7 +1151,9 @@ define @fcmp_une_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1082,10 +1166,14 @@ define @fcmp_uno_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v12, v10, v10, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v10, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f16( %va, %vb, metadata !"uno", %m, i32 %evl) @@ -1097,10 +1185,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v10, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1114,10 +1206,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v12, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1154,13 +1250,17 @@ ; CHECK-NEXT: mv a4, a5 ; CHECK-NEXT: .LBB85_2: ; CHECK-NEXT: vl8re16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a4, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v2, 0 +; CHECK-NEXT: vsetvli zero, a4, e16, m8, ta, mu ; CHECK-NEXT: vmfeq.vv v2, v16, v24, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB85_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB85_4: -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload @@ -1729,7 +1829,9 @@ define @fcmp_oeq_vv_nxv3f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vv_nxv3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1742,7 +1844,9 @@ define @fcmp_oeq_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -1753,7 +1857,9 @@ define @fcmp_oeq_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1766,7 +1872,9 @@ define @fcmp_oeq_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1779,7 +1887,9 @@ define @fcmp_ogt_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -1790,7 +1900,9 @@ define @fcmp_ogt_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1803,7 +1915,9 @@ define @fcmp_ogt_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1816,7 +1930,9 @@ define @fcmp_oge_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -1827,7 +1943,9 @@ define @fcmp_oge_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1840,7 +1958,9 @@ define @fcmp_oge_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1853,7 +1973,9 @@ define @fcmp_olt_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -1864,7 +1986,9 @@ define @fcmp_olt_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1877,7 +2001,9 @@ define @fcmp_olt_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1890,7 +2016,9 @@ define @fcmp_ole_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -1901,7 +2029,9 @@ define @fcmp_ole_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1914,7 +2044,9 @@ define @fcmp_ole_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1927,10 +2059,14 @@ define @fcmp_one_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v25, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"one", %m, i32 %evl) @@ -1940,10 +2076,14 @@ define @fcmp_one_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v17, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1955,10 +2095,14 @@ define @fcmp_one_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v17, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1970,10 +2114,14 @@ define @fcmp_ord_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vv v24, v16, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmand.mm v0, v16, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ord", %m, i32 %evl) @@ -1985,10 +2133,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmand.mm v0, v16, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2002,10 +2154,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmand.mm v0, v24, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2017,10 +2173,14 @@ define @fcmp_ueq_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnor.mm v0, v25, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ueq", %m, i32 %evl) @@ -2030,10 +2190,14 @@ define @fcmp_ueq_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v17, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2045,10 +2209,14 @@ define @fcmp_ueq_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v17, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2060,9 +2228,10 @@ define @fcmp_ugt_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ugt", %m, i32 %evl) @@ -2072,9 +2241,10 @@ define @fcmp_ugt_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2086,9 +2256,10 @@ define @fcmp_ugt_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2100,9 +2271,10 @@ define @fcmp_uge_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"uge", %m, i32 %evl) @@ -2112,9 +2284,10 @@ define @fcmp_uge_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2126,9 +2299,10 @@ define @fcmp_uge_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2140,9 +2314,10 @@ define @fcmp_ult_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ult", %m, i32 %evl) @@ -2152,9 +2327,10 @@ define @fcmp_ult_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2166,9 +2342,10 @@ define @fcmp_ult_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2180,9 +2357,10 @@ define @fcmp_ule_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ule", %m, i32 %evl) @@ -2192,9 +2370,10 @@ define @fcmp_ule_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2206,9 +2385,10 @@ define @fcmp_ule_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2220,7 +2400,9 @@ define @fcmp_une_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -2231,7 +2413,9 @@ define @fcmp_une_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2244,7 +2428,9 @@ define @fcmp_une_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2257,10 +2443,14 @@ define @fcmp_uno_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v24, v16, v16, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v16, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"uno", %m, i32 %evl) @@ -2272,10 +2462,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v16, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2289,10 +2483,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; CHECK-NEXT: vmor.mm v0, v24, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -2346,14 +2544,16 @@ ; CHECK-NEXT: li t2, 24 ; CHECK-NEXT: vsetvli t3, zero, e8, mf2, ta, mu ; CHECK-NEXT: vslidedown.vx v1, v24, a4 -; CHECK-NEXT: vl8re64.v v8, (a2) -; CHECK-NEXT: csrr t3, vlenb -; CHECK-NEXT: slli t3, t3, 3 -; CHECK-NEXT: add t3, sp, t3 -; CHECK-NEXT: addi t3, t3, 16 -; CHECK-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill ; CHECK-NEXT: slli t3, a3, 4 -; CHECK-NEXT: vsetvli zero, t1, e64, m8, ta, ma +; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: csrr t4, vlenb +; CHECK-NEXT: slli t4, t4, 3 +; CHECK-NEXT: add t4, sp, t4 +; CHECK-NEXT: addi t4, t4, 16 +; CHECK-NEXT: vs8r.v v8, (t4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, t1, e64, m8, ta, mu ; CHECK-NEXT: csrr t1, vlenb ; CHECK-NEXT: slli t1, t1, 4 ; CHECK-NEXT: add t1, sp, t1 @@ -2367,7 +2567,9 @@ ; CHECK-NEXT: li t1, 0 ; CHECK-NEXT: mul t4, a3, t2 ; CHECK-NEXT: add t2, a2, t3 -; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli t3, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: li t3, 24 @@ -2421,7 +2623,9 @@ ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a2, 24 @@ -2443,7 +2647,9 @@ ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB171_12: -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1190,7 +1190,9 @@ ; CHECK-NEXT: vl8r.v v8, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -1202,7 +1204,9 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB96_4: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 @@ -1237,14 +1241,18 @@ ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, mu ; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu ; CHECK-NEXT: sub a1, a2, a3 ; CHECK-NEXT: vmseq.vx v25, v8, a0, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB97_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a5, a1 ; CHECK-NEXT: .LBB97_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 @@ -1268,14 +1276,18 @@ ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, mu ; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu ; CHECK-NEXT: sub a1, a2, a3 ; CHECK-NEXT: vmseq.vx v25, v8, a0, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB98_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a5, a1 ; CHECK-NEXT: .LBB98_4: -; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 @@ -1821,7 +1833,9 @@ define @icmp_eq_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1832,7 +1846,9 @@ define @icmp_eq_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1845,7 +1861,9 @@ define @icmp_eq_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1858,7 +1876,9 @@ define @icmp_eq_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1871,7 +1891,9 @@ define @icmp_eq_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1884,7 +1906,9 @@ define @icmp_ne_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1895,7 +1919,9 @@ define @icmp_ne_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1908,7 +1934,9 @@ define @icmp_ne_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1921,7 +1949,9 @@ define @icmp_ne_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1934,7 +1964,9 @@ define @icmp_ne_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1947,7 +1979,9 @@ define @icmp_ugt_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -1958,7 +1992,9 @@ define @icmp_ugt_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1971,7 +2007,9 @@ define @icmp_ugt_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1984,7 +2022,9 @@ define @icmp_ugt_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -1997,7 +2037,9 @@ define @icmp_ugt_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2010,7 +2052,9 @@ define @icmp_uge_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2023,7 +2067,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2036,7 +2082,9 @@ define @icmp_uge_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2049,7 +2097,9 @@ define @icmp_uge_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2062,7 +2112,9 @@ define @icmp_uge_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2075,7 +2127,9 @@ define @icmp_ult_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2086,7 +2140,9 @@ define @icmp_ult_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2099,7 +2155,9 @@ define @icmp_ult_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2112,7 +2170,9 @@ define @icmp_ult_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2125,7 +2185,9 @@ define @icmp_ult_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2138,7 +2200,9 @@ define @icmp_sgt_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2149,7 +2213,9 @@ define @icmp_sgt_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2162,7 +2228,9 @@ define @icmp_sgt_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2175,7 +2243,9 @@ define @icmp_sgt_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2188,7 +2258,9 @@ define @icmp_sgt_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2201,7 +2273,9 @@ define @icmp_sge_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2214,7 +2288,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2227,7 +2303,9 @@ define @icmp_sge_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2240,7 +2318,9 @@ define @icmp_sge_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2253,7 +2333,9 @@ define @icmp_sge_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2266,7 +2348,9 @@ define @icmp_slt_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2277,7 +2361,9 @@ define @icmp_slt_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2290,7 +2376,9 @@ define @icmp_slt_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2303,7 +2391,9 @@ define @icmp_slt_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2316,7 +2406,9 @@ define @icmp_slt_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2329,7 +2421,9 @@ define @icmp_sle_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2340,7 +2434,9 @@ define @icmp_sle_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vx v12, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2355,7 +2451,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, mu ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2368,7 +2466,9 @@ define @icmp_sle_vi_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vi_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2381,7 +2481,9 @@ define @icmp_sle_vi_swap_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2419,13 +2521,17 @@ ; CHECK-NEXT: mv a4, a5 ; CHECK-NEXT: .LBB189_2: ; CHECK-NEXT: vl8re32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v2, 0 +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, mu ; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB189_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB189_4: -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload @@ -2458,13 +2564,17 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a5 ; CHECK-NEXT: .LBB190_2: -; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, mu ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t ; CHECK-NEXT: bltu a1, a3, .LBB190_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB190_4: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 @@ -2493,13 +2603,17 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a5 ; CHECK-NEXT: .LBB191_2: -; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, mu ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t ; CHECK-NEXT: bltu a1, a3, .LBB191_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB191_4: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 @@ -3300,7 +3414,9 @@ define @icmp_eq_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmseq.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -3318,7 +3434,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmseq.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3326,7 +3444,9 @@ ; ; RV64-LABEL: icmp_eq_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmseq.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3346,7 +3466,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmseq.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3354,7 +3476,9 @@ ; ; RV64-LABEL: icmp_eq_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmseq.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3367,7 +3491,9 @@ define @icmp_eq_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmseq.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3380,7 +3506,9 @@ define @icmp_eq_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmseq.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3393,7 +3521,9 @@ define @icmp_ne_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsne.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -3411,7 +3541,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsne.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3419,7 +3551,9 @@ ; ; RV64-LABEL: icmp_ne_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsne.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3439,7 +3573,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsne.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3447,7 +3583,9 @@ ; ; RV64-LABEL: icmp_ne_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsne.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3460,7 +3598,9 @@ define @icmp_ne_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsne.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3473,7 +3613,9 @@ define @icmp_ne_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsne.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3486,7 +3628,9 @@ define @icmp_ugt_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsltu.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -3504,7 +3648,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsltu.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3512,7 +3658,9 @@ ; ; RV64-LABEL: icmp_ugt_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsgtu.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3532,7 +3680,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsltu.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3540,7 +3690,9 @@ ; ; RV64-LABEL: icmp_ugt_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsltu.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3553,7 +3705,9 @@ define @icmp_ugt_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsgtu.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3566,7 +3720,9 @@ define @icmp_ugt_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsleu.vi v16, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3579,7 +3735,9 @@ define @icmp_uge_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsleu.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -3597,7 +3755,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsleu.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3607,7 +3767,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsleu.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3627,7 +3789,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsleu.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3635,7 +3799,9 @@ ; ; RV64-LABEL: icmp_uge_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsleu.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3648,7 +3814,9 @@ define @icmp_uge_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsgtu.vi v16, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3661,7 +3829,9 @@ define @icmp_uge_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsleu.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3674,7 +3844,9 @@ define @icmp_ult_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsltu.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -3692,7 +3864,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsltu.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3700,7 +3874,9 @@ ; ; RV64-LABEL: icmp_ult_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsltu.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3720,7 +3896,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsltu.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3728,7 +3906,9 @@ ; ; RV64-LABEL: icmp_ult_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsgtu.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3741,7 +3921,9 @@ define @icmp_ult_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsleu.vi v16, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3754,7 +3936,9 @@ define @icmp_ult_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsgtu.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3767,7 +3951,9 @@ define @icmp_sgt_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmslt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -3785,7 +3971,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmslt.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3793,7 +3981,9 @@ ; ; RV64-LABEL: icmp_sgt_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsgt.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3813,7 +4003,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmslt.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3821,7 +4013,9 @@ ; ; RV64-LABEL: icmp_sgt_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmslt.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3834,7 +4028,9 @@ define @icmp_sgt_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsgt.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3847,7 +4043,9 @@ define @icmp_sgt_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsle.vi v16, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3860,7 +4058,9 @@ define @icmp_sge_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -3878,7 +4078,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3888,7 +4090,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3908,7 +4112,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsle.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3916,7 +4122,9 @@ ; ; RV64-LABEL: icmp_sge_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsle.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3929,7 +4137,9 @@ define @icmp_sge_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsgt.vi v16, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3942,7 +4152,9 @@ define @icmp_sge_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsle.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3955,7 +4167,9 @@ define @icmp_slt_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmslt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -3973,7 +4187,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmslt.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -3981,7 +4197,9 @@ ; ; RV64-LABEL: icmp_slt_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmslt.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -4001,7 +4219,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmslt.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -4009,7 +4229,9 @@ ; ; RV64-LABEL: icmp_slt_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsgt.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -4022,7 +4244,9 @@ define @icmp_slt_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsle.vi v16, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -4035,7 +4259,9 @@ define @icmp_slt_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsgt.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -4048,7 +4274,9 @@ define @icmp_sle_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vv_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: ret @@ -4066,7 +4294,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsle.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -4074,7 +4304,9 @@ ; ; RV64-LABEL: icmp_sle_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsle.vx v16, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -4094,7 +4326,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: addi sp, sp, 16 @@ -4104,7 +4338,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -4117,7 +4353,9 @@ define @icmp_sle_vi_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vi_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsle.vi v16, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -4130,7 +4368,9 @@ define @icmp_sle_vi_swap_nxv8i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmsgt.vi v16, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll --- a/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll @@ -957,7 +957,9 @@ define @intrinsic_vcompress_um_nxv1i8_nxv1i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vcompress_um_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vcompress.vm v9, v8, v0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll @@ -59,7 +59,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll @@ -59,7 +59,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfcvt.rtz.xu.f.v v10, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -114,6 +114,8 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fptrunc.nxv16f64.nxv16f32( %a, %m, i32 %vl) @@ -131,10 +133,7 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a4, a1, 1 @@ -162,31 +161,30 @@ ; CHECK-NEXT: .LBB8_6: ; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vx v1, v24, a7 +; CHECK-NEXT: vslidedown.vx v25, v24, a7 ; CHECK-NEXT: add a7, a0, t0 ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu -; CHECK-NEXT: sub a4, a2, a4 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: slli a5, a5, 3 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a5, zero, e16, m8, ta, mu +; CHECK-NEXT: sub a4, a2, a4 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: bltu a2, a4, .LBB8_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: mv a6, a4 ; CHECK-NEXT: .LBB8_8: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu -; CHECK-NEXT: vl8re64.v v16, (a7) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a7) ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: sub a4, a6, a1 -; CHECK-NEXT: vslidedown.vx v0, v1, a3 +; CHECK-NEXT: vslidedown.vx v0, v25, a3 ; CHECK-NEXT: bltu a6, a4, .LBB8_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: mv a2, a4 @@ -195,21 +193,23 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t +; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t ; CHECK-NEXT: bltu a6, a1, .LBB8_12 ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: .LBB8_12: ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -281,6 +281,8 @@ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv32i8: @@ -288,7 +290,7 @@ ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: slli a5, a3, 1 ; RV64-NEXT: sub a6, a1, a5 -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: li a4, 0 ; RV64-NEXT: li a2, 0 ; RV64-NEXT: bltu a1, a6, .LBB12_2 @@ -303,10 +305,10 @@ ; RV64-NEXT: .LBB12_4: ; RV64-NEXT: srli a6, a3, 2 ; RV64-NEXT: vsetvli t0, zero, e8, mf2, ta, mu -; RV64-NEXT: vslidedown.vx v13, v12, a6 +; RV64-NEXT: vslidedown.vx v12, v13, a6 ; RV64-NEXT: srli a6, a3, 3 ; RV64-NEXT: vsetvli t0, zero, e8, mf4, ta, mu -; RV64-NEXT: vslidedown.vx v0, v13, a6 +; RV64-NEXT: vslidedown.vx v0, v12, a6 ; RV64-NEXT: vsetvli t0, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, a7, e8, m1, ta, mu @@ -321,7 +323,7 @@ ; RV64-NEXT: mv a4, a5 ; RV64-NEXT: .LBB12_8: ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu -; RV64-NEXT: vslidedown.vx v0, v12, a6 +; RV64-NEXT: vslidedown.vx v0, v13, a6 ; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v9 ; RV64-NEXT: vsetvli zero, a4, e8, m1, ta, mu @@ -333,8 +335,10 @@ ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: bltu a2, a3, .LBB12_12 ; RV64-NEXT: # %bb.11: ; RV64-NEXT: mv a2, a3 @@ -342,7 +346,7 @@ ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll @@ -59,7 +59,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vand.vi v10, v8, 1, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -181,6 +181,8 @@ ; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv15i16.nxv15i64( %a, %m, i32 %vl) @@ -241,6 +243,8 @@ ; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu ; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv32i7.nxv32i32( %a, %m, i32 %vl) @@ -277,6 +281,8 @@ ; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu ; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv32i8.nxv32i32( %a, %m, i32 %vl) @@ -294,10 +300,7 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a4, a1, 1 @@ -325,31 +328,30 @@ ; CHECK-NEXT: .LBB17_6: ; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: vsetvli t1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vx v1, v24, a7 +; CHECK-NEXT: vslidedown.vx v25, v24, a7 ; CHECK-NEXT: add a7, a0, t0 ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, mu -; CHECK-NEXT: sub a4, a2, a4 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: slli a5, a5, 3 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8re8.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a5, zero, e16, m8, ta, mu +; CHECK-NEXT: sub a4, a2, a4 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: bltu a2, a4, .LBB17_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: mv a6, a4 ; CHECK-NEXT: .LBB17_8: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu -; CHECK-NEXT: vl8re64.v v16, (a7) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a7) ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: sub a4, a6, a1 -; CHECK-NEXT: vslidedown.vx v0, v1, a3 +; CHECK-NEXT: vslidedown.vx v0, v25, a3 ; CHECK-NEXT: bltu a6, a4, .LBB17_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: mv a2, a4 @@ -358,21 +360,23 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v20, v24, v0.t +; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t ; CHECK-NEXT: bltu a6, a1, .LBB17_12 ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: .LBB17_12: ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v8, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0