Index: llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -296,6 +296,55 @@ return Log2SEW == 0; } +/// Returns pair of {is-tail-agnostic, is-mask-agnostic). +std::pair getPolicyFlags(const MachineInstr &MI, + const MachineRegisterInfo *MRI = nullptr) { + const unsigned TSFlags = MI.getDesc().TSFlags; + bool TailAgnostic, MaskAgnostic; + unsigned UseOpIdx; + if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { + // Start with undisturbed. + TailAgnostic = false; + MaskAgnostic = false; + + // If there is a policy operand, use it. + if (RISCVII::hasVecPolicyOp(TSFlags)) { + const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); + uint64_t Policy = Op.getImm(); + assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && + "Invalid Policy Value"); + TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; + MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; + } + + // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and + // MaskAgnostic. + const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + if (MRI) { + MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); + if (UseMI && UseMI->isImplicitDef()) { + TailAgnostic = true; + MaskAgnostic = true; + } + } + // Some pseudo instructions force a tail agnostic policy despite having a + // tied def. + if (RISCVII::doesForceTailAgnostic(TSFlags)) + TailAgnostic = true; + + if (!RISCVII::usesMaskPolicy(TSFlags)) + MaskAgnostic = true; + } else { + // If there is no tied operand,, there shouldn't be a policy operand. + assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand"); + // No tied operand use agnostic policies. + TailAgnostic = true; + MaskAgnostic = true; + } + + return {TailAgnostic, MaskAgnostic}; +} + /// Which subfields of VL or VTYPE have values we need to preserve? struct DemandedFields { bool VL = false; @@ -376,6 +425,12 @@ // Behavior is independent of mask policy. if (!RISCVII::usesMaskPolicy(TSFlags)) Res.MaskPolicy = false; + + auto [TA, MA] = getPolicyFlags(MI); + if (TA) + Res.TailPolicy = false; + if (MA) + Res.MaskPolicy = false; } // Loads and stores with implicit EEW do not demand SEW or LMUL directly. @@ -768,46 +823,7 @@ const MachineRegisterInfo *MRI) { VSETVLIInfo InstrInfo; - bool TailAgnostic, MaskAgnostic; - unsigned UseOpIdx; - if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { - // Start with undisturbed. - TailAgnostic = false; - MaskAgnostic = false; - - // If there is a policy operand, use it. - if (RISCVII::hasVecPolicyOp(TSFlags)) { - const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); - uint64_t Policy = Op.getImm(); - assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && - "Invalid Policy Value"); - TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; - MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; - } - - // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and - // MaskAgnostic. - const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); - if (UseMI && UseMI->isImplicitDef()) { - TailAgnostic = true; - MaskAgnostic = true; - } - // Some pseudo instructions force a tail agnostic policy despite having a - // tied def. - if (RISCVII::doesForceTailAgnostic(TSFlags)) - TailAgnostic = true; - - if (!RISCVII::usesMaskPolicy(TSFlags)) - MaskAgnostic = true; - } else { - // If there is no tied operand,, there shouldn't be a policy operand. - assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand"); - // No tied operand use agnostic policies. - TailAgnostic = true; - MaskAgnostic = true; - } - + auto [TailAgnostic, MaskAgnostic] = getPolicyFlags(MI, MRI); RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); Index: llvm/test/CodeGen/RISCV/fold-vector-cmp.ll =================================================================== --- llvm/test/CodeGen/RISCV/fold-vector-cmp.ll +++ llvm/test/CodeGen/RISCV/fold-vector-cmp.ll @@ -13,12 +13,10 @@ ; CHECK-V-LABEL: test: ; CHECK-V: # %bb.0: ; CHECK-V-NEXT: lui a1, 524288 -; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, tu, ma ; CHECK-V-NEXT: vmv.v.x v8, a1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addiw a0, a1, 2 -; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-V-NEXT: vmslt.vx v0, v8, a0 ; CHECK-V-NEXT: vmv.v.i v8, 0 ; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 Index: llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll =================================================================== --- llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -507,7 +507,6 @@ ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 ; CHECK-V-NEXT: addiw a1, a0, -1 -; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-V-NEXT: vmin.vx v8, v10, a1 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -658,7 +657,6 @@ ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-V-NEXT: vminu.vx v10, v10, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -826,7 +824,6 @@ ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -1495,7 +1492,6 @@ ; CHECK-V-NEXT: vslideup.vi v8, v10, 7 ; CHECK-V-NEXT: lui a0, 8 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 @@ -1771,7 +1767,6 @@ ; CHECK-V-NEXT: vslideup.vi v8, v10, 7 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-V-NEXT: vminu.vx v10, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -2083,7 +2078,6 @@ ; CHECK-V-NEXT: vslideup.vi v8, v10, 7 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -3921,7 +3915,6 @@ ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 ; CHECK-V-NEXT: addiw a1, a0, -1 -; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-V-NEXT: vmin.vx v8, v10, a1 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -4070,7 +4063,6 @@ ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-V-NEXT: vminu.vx v10, v10, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -4237,7 +4229,6 @@ ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 ; CHECK-V-NEXT: srli a0, a0, 32 -; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-V-NEXT: vmin.vx v8, v10, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -4894,7 +4885,6 @@ ; CHECK-V-NEXT: vslideup.vi v8, v10, 7 ; CHECK-V-NEXT: lui a0, 8 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: lui a0, 1048568 ; CHECK-V-NEXT: vmax.vx v10, v8, a0 @@ -5166,7 +5156,6 @@ ; CHECK-V-NEXT: vslideup.vi v8, v10, 7 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-V-NEXT: vminu.vx v10, v8, a0 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 @@ -5477,7 +5466,6 @@ ; CHECK-V-NEXT: vslideup.vi v8, v10, 7 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addiw a0, a0, -1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-V-NEXT: vmin.vx v8, v8, a0 ; CHECK-V-NEXT: vmax.vx v10, v8, zero ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma Index: llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv1f16( %va, %m, i32 %evl) @@ -32,14 +29,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -55,16 +51,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv2f16( %va, %m, i32 %evl) @@ -76,14 +69,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -99,16 +91,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv4f16( %va, %m, i32 %evl) @@ -120,14 +109,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -141,20 +129,17 @@ define @vp_ceil_vv_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv8f16( %va, %m, i32 %evl) @@ -166,14 +151,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -187,20 +171,17 @@ define @vp_ceil_vv_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv16f16( %va, %m, i32 %evl) @@ -212,14 +193,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -233,20 +213,17 @@ define @vp_ceil_vv_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv32f16( %va, %m, i32 %evl) @@ -258,14 +235,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -281,16 +257,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv1f32( %va, %m, i32 %evl) @@ -302,14 +275,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI13_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -325,16 +297,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv2f32( %va, %m, i32 %evl) @@ -346,14 +315,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI15_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -367,20 +335,17 @@ define @vp_ceil_vv_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv4f32( %va, %m, i32 %evl) @@ -392,14 +357,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -413,20 +377,17 @@ define @vp_ceil_vv_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv8f32( %va, %m, i32 %evl) @@ -438,14 +399,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -459,20 +419,17 @@ define @vp_ceil_vv_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv16f32( %va, %m, i32 %evl) @@ -484,14 +441,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -507,16 +463,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv1f64( %va, %m, i32 %evl) @@ -528,14 +481,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -549,20 +501,17 @@ define @vp_ceil_vv_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv2f64( %va, %m, i32 %evl) @@ -574,14 +523,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -595,20 +543,17 @@ define @vp_ceil_vv_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv4f64( %va, %m, i32 %evl) @@ -620,14 +565,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -641,20 +585,17 @@ define @vp_ceil_vv_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv7f64( %va, %m, i32 %evl) @@ -666,14 +607,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -687,20 +627,17 @@ define @vp_ceil_vv_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv8f64( %va, %m, i32 %evl) @@ -712,14 +649,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -734,70 +670,58 @@ define @vp_ceil_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a4, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 +; CHECK-NEXT: vslidedown.vx v2, v0, a4 ; CHECK-NEXT: bltu a0, a3, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -817,28 +741,26 @@ ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: sub a1, a0, a1 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 Index: llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -9,14 +9,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) @@ -29,14 +28,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f16( %x) @@ -49,14 +47,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x) @@ -69,14 +66,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f16( %x) @@ -89,14 +85,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv16f16( %x) @@ -109,14 +104,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv32f16( %x) @@ -129,14 +123,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) @@ -149,14 +142,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f32( %x) @@ -169,14 +161,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) @@ -189,14 +180,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f32( %x) @@ -209,14 +199,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv16f32( %x) @@ -229,14 +218,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv1f64( %x) @@ -249,14 +237,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f64( %x) @@ -269,14 +256,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f64( %x) @@ -289,14 +275,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f64( %x) Index: llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -9,14 +9,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f16( %x) @@ -29,14 +28,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f16( %x) @@ -49,14 +47,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f16( %x) @@ -69,14 +66,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f16( %x) @@ -89,14 +85,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv16f16( %x) @@ -109,14 +104,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv32f16( %x) @@ -129,14 +123,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f32( %x) @@ -149,14 +142,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f32( %x) @@ -169,14 +161,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f32( %x) @@ -189,14 +180,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f32( %x) @@ -209,14 +199,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv16f32( %x) @@ -229,14 +218,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv1f64( %x) @@ -249,14 +237,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f64( %x) @@ -269,14 +256,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f64( %x) @@ -289,14 +275,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f64( %x) Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll @@ -20,11 +20,9 @@ ; CHECK-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: vmv.x.s a1, v8 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -523,9 +523,8 @@ ; ; RV32ELEN32-LABEL: bitcast_i64_v4i16: ; RV32ELEN32: # %bb.0: -; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32ELEN32-NEXT: vmv.v.x v8, a1 -; RV32ELEN32-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV32ELEN32-NEXT: vmv.s.x v8, a0 ; RV32ELEN32-NEXT: ret ; @@ -562,9 +561,8 @@ ; ; RV32ELEN32-LABEL: bitcast_i64_v2i32: ; RV32ELEN32: # %bb.0: -; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32ELEN32-NEXT: vmv.v.x v8, a1 -; RV32ELEN32-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV32ELEN32-NEXT: vmv.s.x v8, a0 ; RV32ELEN32-NEXT: ret ; Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -34,16 +31,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -59,16 +53,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -82,16 +73,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -107,16 +95,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -130,16 +115,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -153,20 +135,17 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -180,18 +159,15 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -207,16 +183,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -230,16 +203,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -255,16 +225,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -278,16 +245,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -301,20 +265,17 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -328,18 +289,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -353,20 +311,17 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -380,18 +335,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -407,16 +359,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -430,16 +379,13 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -453,20 +399,17 @@ define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -480,18 +423,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -505,20 +445,17 @@ define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -532,18 +469,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -557,20 +491,17 @@ define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -584,18 +515,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 @@ -609,20 +537,17 @@ define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -636,18 +561,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -661,69 +583,57 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: vslidedown.vi v2, v0, 2 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,71 +644,55 @@ define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: add sp, sp, a2 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: bltu a0, a1, .LBB27_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -702,7 +702,6 @@ ; RV32NOM-NEXT: vmv.v.i v10, 0 ; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV32NOM-NEXT: vslideup.vi v10, v9, 3 -; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32NOM-NEXT: lui a0, %hi(.LCPI38_0) ; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI38_0) ; RV32NOM-NEXT: vle32.v v9, (a0) @@ -741,7 +740,6 @@ ; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV64-NEXT: vslideup.vi v10, v9, 3 -; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64-NEXT: lui a0, %hi(.LCPI38_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI38_0) ; RV64-NEXT: vle32.v v9, (a0) Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -34,16 +31,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -59,16 +53,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -82,16 +73,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -107,16 +95,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -130,16 +115,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -153,20 +135,17 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -180,18 +159,15 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -207,16 +183,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -230,16 +203,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -255,16 +225,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -278,16 +245,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -301,20 +265,17 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -328,18 +289,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -353,20 +311,17 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -380,18 +335,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -407,16 +359,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -430,16 +379,13 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -453,20 +399,17 @@ define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -480,18 +423,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -505,20 +445,17 @@ define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -532,18 +469,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -557,20 +491,17 @@ define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -584,18 +515,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 @@ -609,20 +537,17 @@ define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -636,18 +561,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -661,69 +583,57 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: vslidedown.vi v2, v0, 2 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,71 +644,55 @@ define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: add sp, sp, a2 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: bltu a0, a1, .LBB27_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1966,7 +1966,7 @@ define void @trunc_v8f16(<8 x half>* %x) { ; CHECK-LABEL: trunc_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI91_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI91_0)(a1) @@ -1974,7 +1974,6 @@ ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -1988,7 +1987,7 @@ define void @trunc_v4f32(<4 x float>* %x) { ; CHECK-LABEL: trunc_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI92_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI92_0)(a1) @@ -1996,7 +1995,6 @@ ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret @@ -2010,7 +2008,7 @@ define void @trunc_v2f64(<2 x double>* %x) { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI93_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI93_0)(a1) @@ -2018,7 +2016,6 @@ ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret @@ -2032,7 +2029,7 @@ define void @ceil_v8f16(<8 x half>* %x) { ; CHECK-LABEL: ceil_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI94_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI94_0)(a1) @@ -2042,7 +2039,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -2056,7 +2052,7 @@ define void @ceil_v4f32(<4 x float>* %x) { ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI95_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI95_0)(a1) @@ -2066,7 +2062,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret @@ -2080,7 +2075,7 @@ define void @ceil_v2f64(<2 x double>* %x) { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI96_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI96_0)(a1) @@ -2090,7 +2085,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret @@ -2104,7 +2098,7 @@ define void @floor_v8f16(<8 x half>* %x) { ; CHECK-LABEL: floor_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI97_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI97_0)(a1) @@ -2114,7 +2108,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -2128,7 +2121,7 @@ define void @floor_v4f32(<4 x float>* %x) { ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI98_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI98_0)(a1) @@ -2138,7 +2131,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret @@ -2152,7 +2144,7 @@ define void @floor_v2f64(<2 x double>* %x) { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI99_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI99_0)(a1) @@ -2162,7 +2154,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret @@ -2176,7 +2167,7 @@ define void @round_v8f16(<8 x half>* %x) { ; CHECK-LABEL: round_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI100_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI100_0)(a1) @@ -2186,7 +2177,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -2200,7 +2190,7 @@ define void @round_v4f32(<4 x float>* %x) { ; CHECK-LABEL: round_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI101_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI101_0)(a1) @@ -2210,7 +2200,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret @@ -2224,7 +2213,7 @@ define void @round_v2f64(<2 x double>* %x) { ; CHECK-LABEL: round_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI102_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI102_0)(a1) @@ -2234,7 +2223,6 @@ ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -279,9 +279,8 @@ ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft0, ft1, ft0 ; RV32-NEXT: fcvt.wu.d a2, ft0, rtz -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV32-NEXT: vsetivli zero, 2, e8, mf8, tu, ma ; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; RV32-NEXT: vmv.s.x v8, a0 ; RV32-NEXT: vse8.v v8, (a1) ; RV32-NEXT: ret @@ -303,9 +302,8 @@ ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft0, ft1, ft0 ; RV64-NEXT: fcvt.lu.d a2, ft0, rtz -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64-NEXT: vsetivli zero, 2, e8, mf8, tu, ma ; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; RV64-NEXT: vmv.s.x v8, a0 ; RV64-NEXT: vse8.v v8, (a1) ; RV64-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -159,7 +159,6 @@ ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptosi <8 x float> %x to <8 x i1> @@ -193,7 +192,6 @@ ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptoui <8 x float> %x to <8 x i1> @@ -591,7 +589,6 @@ ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptosi <8 x double> %x to <8 x i1> @@ -647,7 +644,6 @@ ; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptoui <8 x double> %x to <8 x i1> Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll @@ -11,14 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.round.v1f16(<1 x half> %x) @@ -31,14 +30,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.round.v2f16(<2 x half> %x) @@ -51,14 +49,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.round.v4f16(<4 x half> %x) @@ -71,14 +68,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.round.v8f16(<8 x half> %x) @@ -91,14 +87,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.round.v16f16(<16 x half> %x) @@ -112,14 +107,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.round.v32f16(<32 x half> %x) @@ -132,14 +126,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.round.v1f32(<1 x float> %x) @@ -152,14 +145,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.round.v2f32(<2 x float> %x) @@ -172,14 +164,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.round.v4f32(<4 x float> %x) @@ -192,14 +183,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.round.v8f32(<8 x float> %x) @@ -212,14 +202,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.round.v16f32(<16 x float> %x) @@ -232,14 +221,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.round.v1f64(<1 x double> %x) @@ -252,14 +240,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.round.v2f64(<2 x double> %x) @@ -272,14 +259,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.round.v4f64(<4 x double> %x) @@ -292,14 +278,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.round.v8f64(<8 x double> %x) Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll @@ -11,14 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.roundeven.v1f16(<1 x half> %x) @@ -31,14 +30,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x) @@ -51,14 +49,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) @@ -71,14 +68,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %x) @@ -91,14 +87,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %x) @@ -112,14 +107,13 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %x) @@ -132,14 +126,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.roundeven.v1f32(<1 x float> %x) @@ -152,14 +145,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x) @@ -172,14 +164,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x) @@ -192,14 +183,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %x) @@ -212,14 +202,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %x) @@ -232,14 +221,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %x) @@ -252,14 +240,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x) @@ -272,14 +259,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %x) @@ -292,14 +278,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %x) Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -58,7 +58,6 @@ ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -133,9 +133,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: ret @@ -208,9 +207,8 @@ ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vle32.v v8, (a1) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 ; LMULMAX1-NEXT: vse32.v v9, (a0) ; LMULMAX1-NEXT: ret @@ -226,9 +224,8 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a1) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; LMULMAX2-NEXT: vle32.v v10, (a0) -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; LMULMAX2-NEXT: vslideup.vi v10, v8, 6 ; LMULMAX2-NEXT: vse32.v v10, (a0) ; LMULMAX2-NEXT: ret @@ -238,9 +235,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1-NEXT: vle32.v v8, (a1) ; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 ; LMULMAX1-NEXT: vse32.v v9, (a0) ; LMULMAX1-NEXT: ret @@ -421,7 +417,6 @@ ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v9, v8, 4 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmsne.vi v8, v9, 0 ; CHECK-NEXT: vsm.v v8, (a0) ; CHECK-NEXT: ret @@ -466,11 +461,10 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vi v9, v8, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -45,9 +45,8 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lw a3, 16(a0) ; RV32-NEXT: addi a4, a0, 20 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV32-NEXT: vlse32.v v10, (a4), zero -; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV32-NEXT: vmv.s.x v10, a3 ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 @@ -157,10 +156,9 @@ define void @insertelt_v8i64_0(<8 x i64>* %x) { ; CHECK-LABEL: insertelt_v8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret @@ -206,10 +204,9 @@ define void @insertelt_c6_v8i64_0(<8 x i64>* %x) { ; CHECK-LABEL: insertelt_c6_v8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: li a1, 6 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret @@ -257,13 +254,11 @@ define void @insertelt_c6_v8i64_0_add(<8 x i64>* %x, <8 x i64>* %y) { ; CHECK-LABEL: insertelt_c6_v8i64_0_add: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a2, 6 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v12, (a1) +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -377,9 +377,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a1, %hi(.LCPI20_0) ; RV64-NEXT: ld a1, %lo(.LCPI20_0)(a1) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vmv.v.i v8, -1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -191,8 +191,8 @@ ; RV32-NEXT: vslideup.vi v20, v16, 7 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vle16.v v21, (a0) +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: li a0, 164 ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vrgatherei16.vv v16, v8, v21 @@ -207,14 +207,14 @@ ; RV64-NEXT: vmv.s.x v16, a0 ; RV64-NEXT: vmv.v.i v20, 2 ; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, ma -; RV64-NEXT: vslideup.vi v20, v16, 7 ; RV64-NEXT: lui a0, %hi(.LCPI11_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vslideup.vi v20, v16, 7 +; RV64-NEXT: vrgather.vv v16, v8, v24 ; RV64-NEXT: li a0, 164 ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV64-NEXT: vrgather.vv v16, v12, v20, v0.t ; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret @@ -323,12 +323,10 @@ define <8 x i8> @splat_ve4_ins_i0ve2(<8 x i8> %v) { ; CHECK-LABEL: splat_ve4_ins_i0ve2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret @@ -371,15 +369,14 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i0ve4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; CHECK-NEXT: vmv.v.i v11, 2 ; CHECK-NEXT: li a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -951,11 +951,10 @@ ; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v10, v9, 6 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v12, v11 ; CHECK-NEXT: vmv.s.x v12, a1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v11, v9, 6 @@ -1000,7 +999,6 @@ ; CHECK-NEXT: vmv.v.i v10, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v10, v9, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret @@ -1032,16 +1030,14 @@ ; ; RV64-LABEL: mulhu_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: lui a1, %hi(.LCPI55_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI55_0) ; RV64-NEXT: vlse64.v v8, (a1), zero ; RV64-NEXT: lui a1, %hi(.LCPI55_1) ; RV64-NEXT: ld a1, %lo(.LCPI55_1)(a1) ; RV64-NEXT: vle64.v v9, (a0) -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmulhu.vv v8, v9, v8 ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vadd.vi v9, v9, 1 @@ -1192,10 +1188,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a2, a1, 1365 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV32-NEXT: vmv.v.x v9, a2 ; RV32-NEXT: addi a1, a1, 1366 -; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV32-NEXT: vmv.s.x v9, a1 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmulh.vv v9, v8, v9 @@ -1221,16 +1216,14 @@ ; ; RV64-LABEL: mulhs_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: lui a1, %hi(.LCPI59_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI59_0) ; RV64-NEXT: vlse64.v v8, (a1), zero ; RV64-NEXT: lui a1, %hi(.LCPI59_1) ; RV64-NEXT: ld a1, %lo(.LCPI59_1)(a1) ; RV64-NEXT: vle64.v v9, (a0) -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmulh.vv v8, v9, v8 ; RV64-NEXT: vid.v v10 ; RV64-NEXT: vrsub.vi v11, v10, 0 @@ -4610,7 +4603,6 @@ ; LMULMAX1-RV32-NEXT: vmv.v.i v13, 2 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; LMULMAX1-RV32-NEXT: vslideup.vi v13, v12, 3 -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v13 ; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 @@ -4720,44 +4712,38 @@ ; ; LMULMAX1-RV64-LABEL: mulhu_v4i64: ; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) +; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0 ; LMULMAX1-RV64-NEXT: li a2, -1 +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI156_0) +; LMULMAX1-RV64-NEXT: addi a3, a3, %lo(.LCPI156_0) +; LMULMAX1-RV64-NEXT: vlse64.v v10, (a3), zero +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI156_1) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI156_1)(a3) ; LMULMAX1-RV64-NEXT: slli a2, a2, 63 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI156_0) -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_1) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI156_1)(a2) -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vid.v v10 +; LMULMAX1-RV64-NEXT: vmv.s.x v9, a2 +; LMULMAX1-RV64-NEXT: vle64.v v11, (a0) +; LMULMAX1-RV64-NEXT: vmv.s.x v10, a3 +; LMULMAX1-RV64-NEXT: vmulhu.vv v10, v8, v10 +; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v9 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_2) ; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI156_2) -; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero +; LMULMAX1-RV64-NEXT: vlse64.v v9, (a2), zero ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_3) ; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI156_3)(a2) +; LMULMAX1-RV64-NEXT: vid.v v10 ; LMULMAX1-RV64-NEXT: vadd.vi v12, v10, 2 -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v12 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11 +; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v12 +; LMULMAX1-RV64-NEXT: vmv.s.x v9, a2 +; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v11, v9 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v10, 1 -; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v10 +; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = udiv <4 x i64> %a, @@ -5077,33 +5063,31 @@ ; ; LMULMAX1-RV64-LABEL: mulhs_v4i64: ; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI160_0) -; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI160_0) -; LMULMAX1-RV64-NEXT: vlse64.v v9, (a1), zero -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI160_1) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI160_1)(a1) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulh.vv v11, v10, v9 +; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI160_0) +; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI160_0) +; LMULMAX1-RV64-NEXT: vlse64.v v8, (a2), zero +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI160_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI160_1)(a2) +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vle64.v v10, (a0) +; LMULMAX1-RV64-NEXT: vmv.s.x v8, a2 +; LMULMAX1-RV64-NEXT: vmulh.vv v11, v9, v8 ; LMULMAX1-RV64-NEXT: vid.v v12 ; LMULMAX1-RV64-NEXT: vrsub.vi v13, v12, 0 -; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v10 -; LMULMAX1-RV64-NEXT: li a1, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v11, a1 +; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v9 +; LMULMAX1-RV64-NEXT: li a2, 63 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v11, a2 ; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vmulh.vv v9, v8, v9 -; LMULMAX1-RV64-NEXT: vmacc.vv v9, v8, v13 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v9, a1 -; LMULMAX1-RV64-NEXT: vsra.vv v9, v9, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-RV64-NEXT: vadd.vv v9, v11, v9 +; LMULMAX1-RV64-NEXT: vmulh.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: vmacc.vv v8, v10, v13 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 +; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v12 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v10, (a2) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = sdiv <4 x i64> %a, Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -53,22 +53,18 @@ define <2 x i1> @buildvec_mask_nonconst_v2i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v2i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v2i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; ZVE32F-NEXT: vmv.v.x v8, a1 -; ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma ; ZVE32F-NEXT: vmv.s.x v8, a0 -; ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 ; ZVE32F-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1716,18 +1716,16 @@ ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: lw a1, 0(a0) ; RV32ZVE32F-NEXT: addi a0, a0, 8 -; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32ZVE32F-NEXT: vlse32.v v9, (a0), zero -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV32ZVE32F-NEXT: vmv.s.x v9, a1 ; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll @@ -26,9 +26,8 @@ define <8 x i32> @vpmerge_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vmseq.vv v0, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 true, i32 0 @@ -178,9 +177,8 @@ define <8 x i32> @vpmerge_vpload2(<8 x i32> %passthru, <8 x i32>* %p, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpload2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu ; CHECK-NEXT: vmseq.vv v0, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 true, i32 0 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -34,16 +31,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -59,16 +53,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -82,16 +73,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -107,16 +95,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -130,16 +115,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -153,20 +135,17 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -180,18 +159,15 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -207,16 +183,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.round.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -230,16 +203,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -255,16 +225,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.round.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -278,16 +245,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -301,20 +265,17 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -328,18 +289,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -353,20 +311,17 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -380,18 +335,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -407,16 +359,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -430,16 +379,13 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -453,20 +399,17 @@ define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -480,18 +423,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -505,20 +445,17 @@ define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -532,18 +469,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -557,20 +491,17 @@ define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -584,18 +515,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 @@ -609,20 +537,17 @@ define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -636,18 +561,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -661,69 +583,57 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: vslidedown.vi v2, v0, 2 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,71 +644,55 @@ define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: add sp, sp, a2 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: bltu a0, a1, .LBB27_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -34,16 +31,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -59,16 +53,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -82,16 +73,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -107,16 +95,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -130,16 +115,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -153,20 +135,17 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -180,18 +159,15 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -207,16 +183,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -230,16 +203,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -255,16 +225,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -278,16 +245,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -301,20 +265,17 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -328,18 +289,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -353,20 +311,17 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -380,18 +335,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -407,16 +359,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -430,16 +379,13 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -453,20 +399,17 @@ define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -480,18 +423,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -505,20 +445,17 @@ define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -532,18 +469,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -557,20 +491,17 @@ define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -584,18 +515,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 @@ -609,20 +537,17 @@ define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -636,18 +561,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -661,69 +583,57 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: vslidedown.vi v2, v0, 2 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,71 +644,55 @@ define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: add sp, sp, a2 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: bltu a0, a1, .LBB27_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -34,16 +31,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -59,16 +53,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -82,16 +73,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -107,16 +95,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -130,16 +115,13 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -153,20 +135,17 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -180,18 +159,15 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -207,16 +183,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -230,16 +203,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -255,16 +225,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -278,16 +245,13 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -301,20 +265,17 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -328,18 +289,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -353,20 +311,17 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -380,18 +335,15 @@ ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -407,16 +359,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -430,16 +379,13 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -453,20 +399,17 @@ define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -480,18 +423,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v10 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -505,20 +445,17 @@ define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -532,18 +469,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v12 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -557,20 +491,17 @@ define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -584,18 +515,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 @@ -609,20 +537,17 @@ define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -636,18 +561,15 @@ ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -661,69 +583,57 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vslidedown.vi v25, v0, 2 +; CHECK-NEXT: vslidedown.vi v2, v0, 2 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB26_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,71 +644,55 @@ define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: vmset.m v24 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v24 -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: add sp, sp, a2 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: bltu a0, a1, .LBB27_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB27_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -2549,7 +2549,6 @@ ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, tu, ma ; RV32-NEXT: vslideup.vi v16, v24, 16 -; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: bltu a1, a3, .LBB96_2 Index: llvm/test/CodeGen/RISCV/rvv/floor-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv1f16( %va, %m, i32 %evl) @@ -32,14 +29,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -55,16 +51,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv2f16( %va, %m, i32 %evl) @@ -76,14 +69,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -99,16 +91,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv4f16( %va, %m, i32 %evl) @@ -120,14 +109,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -141,20 +129,17 @@ define @vp_floor_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv8f16( %va, %m, i32 %evl) @@ -166,14 +151,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -187,20 +171,17 @@ define @vp_floor_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv16f16( %va, %m, i32 %evl) @@ -212,14 +193,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -233,20 +213,17 @@ define @vp_floor_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv32f16( %va, %m, i32 %evl) @@ -258,14 +235,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -281,16 +257,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv1f32( %va, %m, i32 %evl) @@ -302,14 +275,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI13_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -325,16 +297,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv2f32( %va, %m, i32 %evl) @@ -346,14 +315,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI15_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -367,20 +335,17 @@ define @vp_floor_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv4f32( %va, %m, i32 %evl) @@ -392,14 +357,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -413,20 +377,17 @@ define @vp_floor_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv8f32( %va, %m, i32 %evl) @@ -438,14 +399,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -459,20 +419,17 @@ define @vp_floor_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv16f32( %va, %m, i32 %evl) @@ -484,14 +441,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -507,16 +463,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv1f64( %va, %m, i32 %evl) @@ -528,14 +481,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -549,20 +501,17 @@ define @vp_floor_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv2f64( %va, %m, i32 %evl) @@ -574,14 +523,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -595,20 +543,17 @@ define @vp_floor_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv4f64( %va, %m, i32 %evl) @@ -620,14 +565,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -641,20 +585,17 @@ define @vp_floor_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv7f64( %va, %m, i32 %evl) @@ -666,14 +607,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -687,20 +627,17 @@ define @vp_floor_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv8f64( %va, %m, i32 %evl) @@ -712,14 +649,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -734,70 +670,58 @@ define @vp_floor_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a4, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 +; CHECK-NEXT: vslidedown.vx v2, v0, a4 ; CHECK-NEXT: bltu a0, a3, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -817,28 +741,26 @@ ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: sub a1, a0, a1 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 Index: llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll +++ llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll @@ -11,14 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv1f16( %x) @@ -31,14 +30,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv2f16( %x) @@ -51,14 +49,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv4f16( %x) @@ -71,14 +68,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f16( %x) @@ -91,14 +87,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv16f16( %x) @@ -111,14 +106,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv32f16( %x) @@ -131,14 +125,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv1f32( %x) @@ -151,14 +144,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv2f32( %x) @@ -171,14 +163,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv4f32( %x) @@ -191,14 +182,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f32( %x) @@ -211,14 +201,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv16f32( %x) @@ -231,14 +220,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv1f64( %x) @@ -251,14 +239,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv2f64( %x) @@ -271,14 +258,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv4f64( %x) @@ -291,14 +277,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f64( %x) Index: llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll +++ llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll @@ -11,14 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv1f16( %x) @@ -31,14 +30,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv2f16( %x) @@ -51,14 +49,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv4f16( %x) @@ -71,14 +68,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv8f16( %x) @@ -91,14 +87,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv16f16( %x) @@ -111,14 +106,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv32f16( %x) @@ -131,14 +125,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv1f32( %x) @@ -151,14 +144,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv2f32( %x) @@ -171,14 +163,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv4f32( %x) @@ -191,14 +182,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv8f32( %x) @@ -211,14 +201,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv16f32( %x) @@ -231,14 +220,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv1f64( %x) @@ -251,14 +239,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv2f64( %x) @@ -271,14 +258,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv4f64( %x) @@ -291,14 +277,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv8f64( %x) Index: llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll +++ llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll @@ -9,12 +9,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv1f16( %x) @@ -27,12 +26,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv2f16( %x) @@ -45,12 +43,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv4f16( %x) @@ -63,12 +60,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv8f16( %x) @@ -81,12 +77,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv16f16( %x) @@ -99,12 +94,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv32f16( %x) @@ -117,12 +111,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv1f32( %x) @@ -135,12 +128,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv2f32( %x) @@ -153,12 +145,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv4f32( %x) @@ -171,12 +162,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv8f32( %x) @@ -189,12 +179,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv16f32( %x) @@ -207,12 +196,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv1f64( %x) @@ -225,12 +213,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv2f64( %x) @@ -243,12 +230,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv4f64( %x) @@ -261,12 +247,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv8f64( %x) Index: llvm/test/CodeGen/RISCV/rvv/round-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv1f16( %va, %m, i32 %evl) @@ -32,14 +29,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -55,16 +51,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv2f16( %va, %m, i32 %evl) @@ -76,14 +69,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -99,16 +91,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv4f16( %va, %m, i32 %evl) @@ -120,14 +109,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -141,20 +129,17 @@ define @vp_round_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv8f16( %va, %m, i32 %evl) @@ -166,14 +151,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -187,20 +171,17 @@ define @vp_round_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv16f16( %va, %m, i32 %evl) @@ -212,14 +193,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -233,20 +213,17 @@ define @vp_round_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv32f16( %va, %m, i32 %evl) @@ -258,14 +235,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -281,16 +257,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv1f32( %va, %m, i32 %evl) @@ -302,14 +275,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI13_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -325,16 +297,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv2f32( %va, %m, i32 %evl) @@ -346,14 +315,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI15_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -367,20 +335,17 @@ define @vp_round_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv4f32( %va, %m, i32 %evl) @@ -392,14 +357,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -413,20 +377,17 @@ define @vp_round_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv8f32( %va, %m, i32 %evl) @@ -438,14 +399,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -459,20 +419,17 @@ define @vp_round_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv16f32( %va, %m, i32 %evl) @@ -484,14 +441,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -507,16 +463,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv1f64( %va, %m, i32 %evl) @@ -528,14 +481,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -549,20 +501,17 @@ define @vp_round_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv2f64( %va, %m, i32 %evl) @@ -574,14 +523,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -595,20 +543,17 @@ define @vp_round_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv4f64( %va, %m, i32 %evl) @@ -620,14 +565,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -641,20 +585,17 @@ define @vp_round_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv7f64( %va, %m, i32 %evl) @@ -666,14 +607,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -687,20 +627,17 @@ define @vp_round_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv8f64( %va, %m, i32 %evl) @@ -712,14 +649,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -734,70 +670,58 @@ define @vp_round_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a4, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 +; CHECK-NEXT: vslidedown.vx v2, v0, a4 ; CHECK-NEXT: bltu a0, a3, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -817,28 +741,26 @@ ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 4 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: sub a1, a0, a1 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 Index: llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv1f16( %va, %m, i32 %evl) @@ -32,14 +29,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -55,16 +51,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv2f16( %va, %m, i32 %evl) @@ -76,14 +69,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -99,16 +91,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv4f16( %va, %m, i32 %evl) @@ -120,14 +109,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -141,20 +129,17 @@ define @vp_roundeven_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv8f16( %va, %m, i32 %evl) @@ -166,14 +151,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -187,20 +171,17 @@ define @vp_roundeven_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f16( %va, %m, i32 %evl) @@ -212,14 +193,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -233,20 +213,17 @@ define @vp_roundeven_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv32f16( %va, %m, i32 %evl) @@ -258,14 +235,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -281,16 +257,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv1f32( %va, %m, i32 %evl) @@ -302,14 +275,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI13_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -325,16 +297,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv2f32( %va, %m, i32 %evl) @@ -346,14 +315,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI15_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -367,20 +335,17 @@ define @vp_roundeven_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv4f32( %va, %m, i32 %evl) @@ -392,14 +357,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -413,20 +377,17 @@ define @vp_roundeven_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv8f32( %va, %m, i32 %evl) @@ -438,14 +399,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -459,20 +419,17 @@ define @vp_roundeven_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f32( %va, %m, i32 %evl) @@ -484,14 +441,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -507,16 +463,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv1f64( %va, %m, i32 %evl) @@ -528,14 +481,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -549,20 +501,17 @@ define @vp_roundeven_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv2f64( %va, %m, i32 %evl) @@ -574,14 +523,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -595,20 +543,17 @@ define @vp_roundeven_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv4f64( %va, %m, i32 %evl) @@ -620,14 +565,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -641,20 +585,17 @@ define @vp_roundeven_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv7f64( %va, %m, i32 %evl) @@ -666,14 +607,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -687,20 +627,17 @@ define @vp_roundeven_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv8f64( %va, %m, i32 %evl) @@ -712,14 +649,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -734,70 +670,58 @@ define @vp_roundeven_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a4, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 +; CHECK-NEXT: vslidedown.vx v2, v0, a4 ; CHECK-NEXT: bltu a0, a3, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -817,28 +741,26 @@ ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: sub a1, a0, a1 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 Index: llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -11,16 +11,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv1f16( %va, %m, i32 %evl) @@ -32,14 +29,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -55,16 +51,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI2_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv2f16( %va, %m, i32 %evl) @@ -76,14 +69,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -99,16 +91,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv4f16( %va, %m, i32 %evl) @@ -120,14 +109,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -141,20 +129,17 @@ define @vp_roundtozero_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8f16( %va, %m, i32 %evl) @@ -166,14 +151,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -187,20 +171,17 @@ define @vp_roundtozero_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16f16( %va, %m, i32 %evl) @@ -212,14 +193,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -233,20 +213,17 @@ define @vp_roundtozero_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv32f16( %va, %m, i32 %evl) @@ -258,14 +235,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -281,16 +257,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI12_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv1f32( %va, %m, i32 %evl) @@ -302,14 +275,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI13_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -325,16 +297,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI14_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv2f32( %va, %m, i32 %evl) @@ -346,14 +315,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI15_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -367,20 +335,17 @@ define @vp_roundtozero_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI16_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv4f32( %va, %m, i32 %evl) @@ -392,14 +357,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -413,20 +377,17 @@ define @vp_roundtozero_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8f32( %va, %m, i32 %evl) @@ -438,14 +399,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -459,20 +419,17 @@ define @vp_roundtozero_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16f32( %va, %m, i32 %evl) @@ -484,14 +441,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -507,16 +463,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv1f64( %va, %m, i32 %evl) @@ -528,14 +481,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -549,20 +501,17 @@ define @vp_roundtozero_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv2f64( %va, %m, i32 %evl) @@ -574,14 +523,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -595,20 +543,17 @@ define @vp_roundtozero_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv4f64( %va, %m, i32 %evl) @@ -620,14 +565,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -641,20 +585,17 @@ define @vp_roundtozero_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv7f64( %va, %m, i32 %evl) @@ -666,14 +607,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -687,20 +627,17 @@ define @vp_roundtozero_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8f64( %va, %m, i32 %evl) @@ -712,14 +649,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 @@ -734,70 +670,58 @@ define @vp_roundtozero_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a4, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a4 +; CHECK-NEXT: vslidedown.vx v2, v0, a4 ; CHECK-NEXT: bltu a0, a3, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t +; CHECK-NEXT: vmflt.vf v1, v16, ft0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -817,28 +741,26 @@ ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3) ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: sub a1, a0, a1 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB33_4: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, ft0 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 Index: llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -26,9 +26,8 @@ define @vpmerge_vpadd2( %passthru, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vmseq.vv v0, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 @@ -191,9 +190,8 @@ define @vpmerge_vpload2( %passthru, * %p, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpload2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu ; CHECK-NEXT: vmseq.vv v0, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 @@ -857,7 +855,6 @@ ; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu ; CHECK-NEXT: vmv4r.v v20, v16 ; CHECK-NEXT: vssubu.vx v20, v16, zero, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmseq.vv v0, v20, v16 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 Index: llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll +++ llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll @@ -1152,9 +1152,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vsub.vv v8, v10, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1187,9 +1186,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vsadd.vv v8, v9, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1288,9 +1286,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vsmul.vv v8, v9, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1454,9 +1451,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vssub.vv v8, v9, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1489,9 +1485,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vssubu.vv v8, v9, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2478,9 +2473,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2510,9 +2504,8 @@ ; RV32-NEXT: li a1, -1 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/vector-splice.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -17,12 +17,10 @@ ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v10, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -41,12 +39,10 @@ ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v10, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -67,12 +63,10 @@ ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v10, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -91,12 +85,10 @@ ; CHECK-NEXT: addi a0, a0, -3 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 3 -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v10, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -117,12 +109,10 @@ ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v10, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -141,12 +131,10 @@ ; CHECK-NEXT: addi a0, a0, -7 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 7 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v10, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -166,12 +154,10 @@ ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v10, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -189,12 +175,10 @@ ; CHECK-NEXT: addi a0, a0, -15 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 15 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v10, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -215,12 +199,10 @@ ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vx v12, v12, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m2, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, tu, ma ; CHECK-NEXT: vslideup.vi v12, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v12, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -239,12 +221,10 @@ ; CHECK-NEXT: addi a0, a0, -31 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v12, 31 -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, tu, ma ; CHECK-NEXT: vslideup.vx v12, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v12, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -265,12 +245,10 @@ ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v16, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m4, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, ma ; CHECK-NEXT: vslideup.vi v16, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v16, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -290,12 +268,10 @@ ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v16, a1 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, ma ; CHECK-NEXT: vslideup.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v16, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -316,12 +292,10 @@ ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v24, v24, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma ; CHECK-NEXT: vslideup.vi v24, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v24, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -341,12 +315,10 @@ ; CHECK-NEXT: li a1, 127 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v24, v24, a1 -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, tu, ma ; CHECK-NEXT: vslideup.vx v24, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v24, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/vmacc.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vmacc.ll +++ llvm/test/CodeGen/RISCV/rvv/vmacc.ll @@ -1568,9 +1568,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vmacc.vv v8, v10, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1604,9 +1603,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV32-NEXT: vmacc.vv v8, v10, v9, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1641,9 +1639,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vmacc.vv v8, v12, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1677,9 +1674,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV32-NEXT: vmacc.vv v8, v12, v10, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1714,9 +1710,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV32-NEXT: vmacc.vv v8, v16, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1750,9 +1745,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, mu ; RV32-NEXT: vmacc.vv v8, v16, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/vmadd.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vmadd.ll +++ llvm/test/CodeGen/RISCV/rvv/vmadd.ll @@ -1568,9 +1568,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vmadd.vv v8, v10, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1604,9 +1603,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV32-NEXT: vmadd.vv v8, v10, v9, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1641,9 +1639,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vmadd.vv v8, v12, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1677,9 +1674,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV32-NEXT: vmadd.vv v8, v12, v10, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1714,9 +1710,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV32-NEXT: vmadd.vv v8, v16, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1750,9 +1745,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, mu ; RV32-NEXT: vmadd.vv v8, v16, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/vnmsac.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vnmsac.ll +++ llvm/test/CodeGen/RISCV/rvv/vnmsac.ll @@ -1568,9 +1568,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vnmsac.vv v8, v10, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1604,9 +1603,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV32-NEXT: vnmsac.vv v8, v10, v9, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1641,9 +1639,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vnmsac.vv v8, v12, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1677,9 +1674,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV32-NEXT: vnmsac.vv v8, v12, v10, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1714,9 +1710,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV32-NEXT: vnmsac.vv v8, v16, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1750,9 +1745,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, mu ; RV32-NEXT: vnmsac.vv v8, v16, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/vnmsub.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vnmsub.ll +++ llvm/test/CodeGen/RISCV/rvv/vnmsub.ll @@ -1568,9 +1568,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV32-NEXT: vnmsub.vv v8, v10, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1604,9 +1603,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV32-NEXT: vnmsub.vv v8, v10, v9, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1641,9 +1639,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; RV32-NEXT: vnmsub.vv v8, v12, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1677,9 +1674,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu ; RV32-NEXT: vnmsub.vv v8, v12, v10, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1714,9 +1710,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV32-NEXT: vnmsub.vv v8, v16, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1750,9 +1745,8 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, mu ; RV32-NEXT: vnmsub.vv v8, v16, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -450,12 +450,11 @@ ; CHECK-NEXT: beqz a3, .LBB8_2 ; CHECK-NEXT: .LBB8_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: slli a4, a3, 2 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma ; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vse32.v v16, (a2) ; CHECK-NEXT: sub a0, a0, a3 Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -519,7 +519,6 @@ ; CHECK-NEXT: $v0 = COPY [[PseudoVMSEQ_VI_MF2_]] ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 23 /* e32, mf2, tu, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: [[PseudoVLE32_V_MF2_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_MF2_MASK [[PseudoVMV_V_I_MF2_]], killed [[COPY]], $v0, -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: [[PseudoVCPOP_M_B1_:%[0-9]+]]:gpr = PseudoVCPOP_M_B1 [[PseudoVMSEQ_VI_MF2_]], -1, 0 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: BEQ killed [[PseudoVCPOP_M_B1_]], [[COPY2]], %bb.3 @@ -532,7 +531,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[DEF]], %bb.1, [[LWU]], %bb.2 - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 215 /* e32, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: [[PseudoVADD_VX_MF2_:%[0-9]+]]:vr = nsw PseudoVADD_VX_MF2 [[PseudoVLE32_V_MF2_MASK]], [[PHI]], -1, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v0 = COPY [[PseudoVADD_VX_MF2_]] ; CHECK-NEXT: PseudoRET implicit $v0 Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -357,14 +357,11 @@ ; CHECK-LABEL: test18: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, ma -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; CHECK-NEXT: vfadd.vv v9, v8, v8 ; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfadd.vv v8, v9, v8 +; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret entry: %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) @@ -502,9 +499,8 @@ ; CHECK-LABEL: vleNff: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, mu -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma ; CHECK-NEXT: vle64ff.v v8, (a0) -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; CHECK-NEXT: vadd.vx v8, v8, a2 ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -535,16 +535,14 @@ ; RV32MV-NEXT: andi a1, a1, 2047 ; RV32MV-NEXT: sh a1, 12(sp) ; RV32MV-NEXT: addi a1, sp, 8 -; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, tu, ma ; RV32MV-NEXT: vle16.v v8, (a1) ; RV32MV-NEXT: vmv.v.i v9, 10 ; RV32MV-NEXT: li a1, 9 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma +; RV32MV-NEXT: lui a2, %hi(.LCPI4_0) +; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_0) +; RV32MV-NEXT: vle16.v v10, (a2) ; RV32MV-NEXT: vmv.s.x v9, a1 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32MV-NEXT: lui a1, %hi(.LCPI4_0) -; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV32MV-NEXT: vle16.v v10, (a1) ; RV32MV-NEXT: vid.v v11 ; RV32MV-NEXT: vsub.vv v8, v8, v11 ; RV32MV-NEXT: vmul.vv v8, v8, v10 @@ -552,11 +550,9 @@ ; RV32MV-NEXT: vsll.vv v9, v10, v9 ; RV32MV-NEXT: vmv.v.i v10, 0 ; RV32MV-NEXT: li a1, 1 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma ; RV32MV-NEXT: vmv1r.v v11, v10 ; RV32MV-NEXT: vmv.s.x v11, a1 ; RV32MV-NEXT: li a1, 2047 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: lui a2, %hi(.LCPI4_1) ; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1) @@ -600,16 +596,14 @@ ; RV64MV-NEXT: srli a1, a1, 53 ; RV64MV-NEXT: sh a1, 10(sp) ; RV64MV-NEXT: addi a1, sp, 8 -; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, tu, ma ; RV64MV-NEXT: vle16.v v8, (a1) ; RV64MV-NEXT: vmv.v.i v9, 10 ; RV64MV-NEXT: li a1, 9 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma +; RV64MV-NEXT: lui a2, %hi(.LCPI4_0) +; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_0) +; RV64MV-NEXT: vle16.v v10, (a2) ; RV64MV-NEXT: vmv.s.x v9, a1 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64MV-NEXT: lui a1, %hi(.LCPI4_0) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV64MV-NEXT: vle16.v v10, (a1) ; RV64MV-NEXT: vid.v v11 ; RV64MV-NEXT: vsub.vv v8, v8, v11 ; RV64MV-NEXT: vmul.vv v8, v8, v10 @@ -617,11 +611,9 @@ ; RV64MV-NEXT: vsll.vv v9, v10, v9 ; RV64MV-NEXT: vmv.v.i v10, 0 ; RV64MV-NEXT: li a1, 1 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma ; RV64MV-NEXT: vmv1r.v v11, v10 ; RV64MV-NEXT: vmv.s.x v11, a1 ; RV64MV-NEXT: li a1, 2047 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64MV-NEXT: vand.vx v8, v8, a1 ; RV64MV-NEXT: lui a2, %hi(.LCPI4_1) ; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_1)