diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3164,6 +3164,11 @@ IsVMSet(MaskSetter.getMachineOpcode()); } +static bool isImplicitDef(SDValue V) { + return V.isMachineOpcode() && + V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; +} + // Optimize masked RVV pseudo instructions with a known all-ones mask to their // corresponding "unmasked" pseudo versions. The mask we're interested in will // take the form of a V0 physical register operand, with a glued @@ -3186,22 +3191,18 @@ if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) TailPolicyOpIdx = getVecPolicyOpIdx(N, MaskedMCID); - bool UseTUPseudo = false; - if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { - // Some operations are their own TU. - if (I->UnmaskedTUPseudo == I->UnmaskedPseudo) { - UseTUPseudo = true; - } else { - if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & - RISCVII::TAIL_AGNOSTIC)) { - // Keep the true-masked instruction when there is no unmasked TU - // instruction - if (I->UnmaskedTUPseudo == I->MaskedPseudo) - return false; - UseTUPseudo = true; - } - } - } + // If the merge operand is implicit_def, then we can throw it away and use + // the tail undefined pseudo. + bool UseTUPseudo = !isImplicitDef(N->getOperand(0)); + + // Some unmasked pseudos are their own TU. + if (I->UnmaskedTUPseudo == I->UnmaskedPseudo) + UseTUPseudo = true; + + // Some instructions like vmflt etc. don't have TU variants because they + // are only tail agnostic. So if the TU pseudo doesn't exist, bail. + if (UseTUPseudo && I->UnmaskedTUPseudo == I->MaskedPseudo) + return false; unsigned Opc = UseTUPseudo ? I->UnmaskedTUPseudo : I->UnmaskedPseudo; const MCInstrDesc &MCID = TII->get(Opc); @@ -3238,11 +3239,6 @@ return true; } -static bool isImplicitDef(SDValue V) { - return V.isMachineOpcode() && - V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; -} - // Try to fold away VMERGE_VVM instructions. We handle these cases: // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction // folds to a masked TU instruction. VMERGE_VVM must have have merge operand diff --git a/llvm/test/CodeGen/RISCV/rvv/allone-masked-to-unmasked.ll b/llvm/test/CodeGen/RISCV/rvv/allone-masked-to-unmasked.ll --- a/llvm/test/CodeGen/RISCV/rvv/allone-masked-to-unmasked.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allone-masked-to-unmasked.ll @@ -75,7 +75,7 @@ define @test3( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -30,12 +30,15 @@ define @vp_ceil_vv_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -74,12 +77,15 @@ define @vp_ceil_vv_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -118,12 +124,15 @@ define @vp_ceil_vv_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,17 +173,22 @@ define @vp_ceil_vv_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -210,17 +224,22 @@ define @vp_ceil_vv_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -256,17 +275,22 @@ define @vp_ceil_vv_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -300,12 +324,15 @@ define @vp_ceil_vv_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -344,12 +371,15 @@ define @vp_ceil_vv_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -390,17 +420,22 @@ define @vp_ceil_vv_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -436,17 +471,22 @@ define @vp_ceil_vv_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -482,17 +522,22 @@ define @vp_ceil_vv_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -526,12 +571,15 @@ define @vp_ceil_vv_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -572,17 +620,22 @@ define @vp_ceil_vv_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -618,17 +671,22 @@ define @vp_ceil_vv_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -664,17 +722,22 @@ define @vp_ceil_vv_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -710,17 +773,22 @@ define @vp_ceil_vv_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -799,6 +867,14 @@ define @vp_ceil_vv_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -806,28 +882,42 @@ ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -15,9 +15,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI0_0) ; RV32-NEXT: fld fa5, %lo(.LCPI0_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -34,9 +36,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI0_0) ; RV64-NEXT: fld fa5, %lo(.LCPI0_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -58,9 +62,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI1_0) ; RV32-NEXT: fld fa5, %lo(.LCPI1_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -77,9 +83,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI1_0) ; RV64-NEXT: fld fa5, %lo(.LCPI1_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -101,9 +109,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI2_0) ; RV32-NEXT: fld fa5, %lo(.LCPI2_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -118,9 +128,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI2_0) ; RV64-NEXT: fld fa5, %lo(.LCPI2_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -140,9 +152,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI3_0) ; RV32-NEXT: fld fa5, %lo(.LCPI3_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -157,9 +171,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI3_0) ; RV64-NEXT: fld fa5, %lo(.LCPI3_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -257,13 +273,17 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI8_0) ; RV32-NEXT: fld fa5, %lo(.LCPI8_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 -; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmset.m v12 +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -276,13 +296,17 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI8_0) ; RV64-NEXT: fld fa5, %lo(.LCPI8_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 -; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmset.m v12 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -300,13 +324,17 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI9_0) ; RV32-NEXT: fld fa5, %lo(.LCPI9_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 -; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmset.m v12 +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -319,13 +347,17 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI9_0) ; RV64-NEXT: fld fa5, %lo(.LCPI9_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 -; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmset.m v12 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -343,13 +375,17 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI10_0) ; RV32-NEXT: fld fa5, %lo(.LCPI10_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 -; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmset.m v12 +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -360,13 +396,17 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI10_0) ; RV64-NEXT: fld fa5, %lo(.LCPI10_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 -; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmset.m v12 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -382,13 +422,17 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 -; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmset.m v12 +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -399,13 +443,17 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI11_0) ; RV64-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 -; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmset.m v12 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -499,10 +547,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI16_0) ; RV32-NEXT: fld fa5, %lo(.LCPI16_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -520,10 +570,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI16_0) ; RV64-NEXT: fld fa5, %lo(.LCPI16_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -546,10 +598,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI17_0) ; RV32-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -567,10 +621,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI17_0) ; RV64-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -593,10 +649,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI18_0) ; RV32-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -612,10 +670,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI18_0) ; RV64-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -636,10 +696,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI19_0) ; RV32-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -655,10 +717,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI19_0) ; RV64-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -781,15 +845,19 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI24_0) ; RV32-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmset.m v12 +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -802,15 +870,19 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI24_0) ; RV64-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmset.m v12 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -828,15 +900,19 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI25_0) ; RV32-NEXT: fld fa5, %lo(.LCPI25_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmset.m v12 +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -849,15 +925,19 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI25_0) ; RV64-NEXT: fld fa5, %lo(.LCPI25_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmset.m v12 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -875,15 +955,19 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI26_0) ; RV32-NEXT: fld fa5, %lo(.LCPI26_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmset.m v12 +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -894,15 +978,19 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI26_0) ; RV64-NEXT: fld fa5, %lo(.LCPI26_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmset.m v12 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -918,15 +1006,19 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI27_0) ; RV32-NEXT: fld fa5, %lo(.LCPI27_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: vmset.m v12 +; RV32-NEXT: vfabs.v v16, v8 +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmflt.vf v12, v16, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vfcvt.x.f.v v16, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV32-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -937,15 +1029,19 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI27_0) ; RV64-NEXT: fld fa5, %lo(.LCPI27_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: vmset.m v12 +; RV64-NEXT: vfabs.v v16, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmflt.vf v12, v16, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vfcvt.x.f.v v16, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t +; RV64-NEXT: vfcvt.f.x.v v16, v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll @@ -8,18 +8,23 @@ ; CHECK-LABEL: ceil_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -30,18 +35,23 @@ ; CHECK-LABEL: ceil_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -52,18 +62,23 @@ ; CHECK-LABEL: ceil_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -74,18 +89,23 @@ ; CHECK-LABEL: ceil_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -96,18 +116,23 @@ ; CHECK-LABEL: ceil_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -118,18 +143,23 @@ ; CHECK-LABEL: ceil_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -141,17 +171,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -163,17 +198,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -185,17 +225,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -207,17 +252,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -229,17 +279,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -250,18 +305,23 @@ ; CHECK-LABEL: ceil_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -272,18 +332,23 @@ ; CHECK-LABEL: ceil_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -294,18 +359,23 @@ ; CHECK-LABEL: ceil_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -316,18 +386,23 @@ ; CHECK-LABEL: ceil_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -9,10 +9,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,10 +31,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -49,10 +53,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -69,15 +75,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f16( %x) ret %a @@ -89,15 +99,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv16f16( %x) ret %a @@ -109,15 +123,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv32f16( %x) ret %a @@ -127,12 +145,14 @@ define @ceil_nxv1f32( %x) { ; CHECK-LABEL: ceil_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -147,12 +167,14 @@ define @ceil_nxv2f32( %x) { ; CHECK-LABEL: ceil_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -167,17 +189,21 @@ define @ceil_nxv4f32( %x) { ; CHECK-LABEL: ceil_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) ret %a @@ -187,17 +213,21 @@ define @ceil_nxv8f32( %x) { ; CHECK-LABEL: ceil_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f32( %x) ret %a @@ -207,17 +237,21 @@ define @ceil_nxv16f32( %x) { ; CHECK-LABEL: ceil_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv16f32( %x) ret %a @@ -229,10 +263,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,15 +285,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv2f64( %x) ret %a @@ -269,15 +309,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv4f64( %x) ret %a @@ -289,15 +333,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.ceil.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll @@ -8,18 +8,23 @@ ; CHECK-LABEL: floor_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -30,18 +35,23 @@ ; CHECK-LABEL: floor_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -52,18 +62,23 @@ ; CHECK-LABEL: floor_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -74,18 +89,23 @@ ; CHECK-LABEL: floor_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -96,18 +116,23 @@ ; CHECK-LABEL: floor_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -118,18 +143,23 @@ ; CHECK-LABEL: floor_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -141,17 +171,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -163,17 +198,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -185,17 +225,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -207,17 +252,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -229,17 +279,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -250,18 +305,23 @@ ; CHECK-LABEL: floor_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -272,18 +332,23 @@ ; CHECK-LABEL: floor_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -294,18 +359,23 @@ ; CHECK-LABEL: floor_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -316,18 +386,23 @@ ; CHECK-LABEL: floor_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -9,10 +9,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,10 +31,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -49,10 +53,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -69,15 +75,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f16( %x) ret %a @@ -89,15 +99,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv16f16( %x) ret %a @@ -109,15 +123,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv32f16( %x) ret %a @@ -127,12 +145,14 @@ define @floor_nxv1f32( %x) { ; CHECK-LABEL: floor_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -147,12 +167,14 @@ define @floor_nxv2f32( %x) { ; CHECK-LABEL: floor_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -167,17 +189,21 @@ define @floor_nxv4f32( %x) { ; CHECK-LABEL: floor_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f32( %x) ret %a @@ -187,17 +213,21 @@ define @floor_nxv8f32( %x) { ; CHECK-LABEL: floor_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f32( %x) ret %a @@ -207,17 +237,21 @@ define @floor_nxv16f32( %x) { ; CHECK-LABEL: floor_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv16f32( %x) ret %a @@ -229,10 +263,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,15 +285,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv2f64( %x) ret %a @@ -269,15 +309,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv4f64( %x) ret %a @@ -289,15 +333,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.floor.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -32,10 +32,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -76,10 +79,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -120,10 +126,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -166,15 +175,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -208,12 +222,15 @@ define <2 x float> @vp_ceil_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -252,12 +269,15 @@ define <4 x float> @vp_ceil_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -298,17 +318,22 @@ define <8 x float> @vp_ceil_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -344,17 +369,22 @@ define <16 x float> @vp_ceil_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -390,10 +420,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -436,15 +469,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer @@ -482,15 +520,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -528,15 +571,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -574,15 +622,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -668,36 +721,58 @@ define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll @@ -8,18 +8,23 @@ ; CHECK-LABEL: ceil_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.ceil.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -30,18 +35,23 @@ ; CHECK-LABEL: ceil_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.ceil.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -52,18 +62,23 @@ ; CHECK-LABEL: ceil_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.ceil.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -74,18 +89,23 @@ ; CHECK-LABEL: ceil_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.ceil.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -96,18 +116,23 @@ ; CHECK-LABEL: ceil_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.ceil.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -119,18 +144,23 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.ceil.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -142,17 +172,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -164,17 +199,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.ceil.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -186,17 +226,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -208,17 +253,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -230,17 +280,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -251,18 +306,23 @@ ; CHECK-LABEL: ceil_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -273,18 +333,23 @@ ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -295,18 +360,23 @@ ; CHECK-LABEL: ceil_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -317,18 +387,23 @@ ; CHECK-LABEL: ceil_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll @@ -8,18 +8,23 @@ ; CHECK-LABEL: floor_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.floor.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -30,18 +35,23 @@ ; CHECK-LABEL: floor_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.floor.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -52,18 +62,23 @@ ; CHECK-LABEL: floor_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.floor.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -74,18 +89,23 @@ ; CHECK-LABEL: floor_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.floor.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -96,18 +116,23 @@ ; CHECK-LABEL: floor_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.floor.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -119,18 +144,23 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.floor.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -142,17 +172,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -164,17 +199,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.floor.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -186,17 +226,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -208,17 +253,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -230,17 +280,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -251,18 +306,23 @@ ; CHECK-LABEL: floor_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -273,18 +333,23 @@ ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -295,18 +360,23 @@ ; CHECK-LABEL: floor_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -317,18 +387,23 @@ ; CHECK-LABEL: floor_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -32,10 +32,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -76,10 +79,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -120,10 +126,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -166,15 +175,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -208,12 +222,15 @@ define <2 x float> @vp_floor_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -252,12 +269,15 @@ define <4 x float> @vp_floor_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -298,17 +318,22 @@ define <8 x float> @vp_floor_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -344,17 +369,22 @@ define <16 x float> @vp_floor_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -390,10 +420,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -436,15 +469,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer @@ -482,15 +520,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -528,15 +571,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -574,15 +622,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -668,36 +721,58 @@ define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -10,18 +10,23 @@ ; CHECK-LABEL: nearbyint_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x half> %r @@ -33,18 +38,23 @@ ; CHECK-LABEL: nearbyint_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x half> %r @@ -56,18 +66,23 @@ ; CHECK-LABEL: nearbyint_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x half> %r @@ -79,18 +94,23 @@ ; CHECK-LABEL: nearbyint_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x half> %r @@ -103,18 +123,23 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <32 x half> %r @@ -127,17 +152,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x float> %r @@ -150,17 +180,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x float> %r @@ -173,17 +208,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x float> %r @@ -196,17 +236,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x float> %r @@ -218,18 +263,23 @@ ; CHECK-LABEL: nearbyint_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x double> %r @@ -241,18 +291,23 @@ ; CHECK-LABEL: nearbyint_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x double> %r @@ -264,18 +319,23 @@ ; CHECK-LABEL: nearbyint_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x double> %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -2417,12 +2417,14 @@ define void @trunc_v8f16(ptr %x) { ; CHECK-LABEL: trunc_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI115_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI115_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -2443,9 +2445,11 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI116_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -2463,12 +2467,14 @@ define void @trunc_v4f32(ptr %x) { ; CHECK-LABEL: trunc_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -2485,12 +2491,14 @@ define void @trunc_v2f64(ptr %x) { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI118_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI118_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -2507,13 +2515,15 @@ define void @ceil_v8f16(ptr %x) { ; CHECK-LABEL: ceil_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI119_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI119_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2535,10 +2545,12 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI120_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI120_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2557,13 +2569,15 @@ define void @ceil_v4f32(ptr %x) { ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2581,13 +2595,15 @@ define void @ceil_v2f64(ptr %x) { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI122_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI122_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2605,13 +2621,15 @@ define void @floor_v8f16(ptr %x) { ; CHECK-LABEL: floor_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI123_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI123_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2633,10 +2651,12 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI124_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI124_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2655,13 +2675,15 @@ define void @floor_v4f32(ptr %x) { ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2679,13 +2701,15 @@ define void @floor_v2f64(ptr %x) { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI126_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI126_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2703,13 +2727,15 @@ define void @round_v8f16(ptr %x) { ; CHECK-LABEL: round_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI127_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI127_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2731,10 +2757,12 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI128_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI128_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2753,13 +2781,15 @@ define void @round_v4f32(ptr %x) { ; CHECK-LABEL: round_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2777,13 +2807,15 @@ define void @round_v2f64(ptr %x) { ; CHECK-LABEL: round_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI130_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI130_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2801,12 +2833,14 @@ define void @rint_v8f16(ptr %x) { ; CHECK-LABEL: rint_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI131_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI131_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -2823,12 +2857,14 @@ define void @rint_v4f32(ptr %x) { ; CHECK-LABEL: rint_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -2845,12 +2881,14 @@ define void @rint_v2f64(ptr %x) { ; CHECK-LABEL: rint_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI133_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI133_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -2867,13 +2905,15 @@ define void @nearbyint_v8f16(ptr %x) { ; CHECK-LABEL: nearbyint_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI134_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI134_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a1 @@ -2891,13 +2931,15 @@ define void @nearbyint_v4f32(ptr %x) { ; CHECK-LABEL: nearbyint_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a1 @@ -2915,13 +2957,15 @@ define void @nearbyint_v2f64(ptr %x) { ; CHECK-LABEL: nearbyint_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI136_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI136_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll @@ -10,18 +10,23 @@ ; CHECK-LABEL: round_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.round.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -32,18 +37,23 @@ ; CHECK-LABEL: round_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.round.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -54,18 +64,23 @@ ; CHECK-LABEL: round_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.round.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -76,18 +91,23 @@ ; CHECK-LABEL: round_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.round.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -98,18 +118,23 @@ ; CHECK-LABEL: round_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.round.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -121,18 +146,23 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.round.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -144,17 +174,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -166,17 +201,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.round.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -188,17 +228,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -210,17 +255,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.round.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -232,17 +282,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.round.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -253,18 +308,23 @@ ; CHECK-LABEL: round_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -275,18 +335,23 @@ ; CHECK-LABEL: round_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -297,18 +362,23 @@ ; CHECK-LABEL: round_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -319,18 +389,23 @@ ; CHECK-LABEL: round_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.round.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll @@ -11,10 +11,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,10 +33,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,10 +55,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -71,10 +77,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -91,15 +99,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.round.v16f16(<16 x half> %x) ret <16 x half> %a @@ -112,15 +124,19 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.round.v32f16(<32 x half> %x) ret <32 x half> %a @@ -130,12 +146,14 @@ define <1 x float> @round_v1f32(<1 x float> %x) { ; CHECK-LABEL: round_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -150,12 +168,14 @@ define <2 x float> @round_v2f32(<2 x float> %x) { ; CHECK-LABEL: round_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -170,12 +190,14 @@ define <4 x float> @round_v4f32(<4 x float> %x) { ; CHECK-LABEL: round_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -190,17 +212,21 @@ define <8 x float> @round_v8f32(<8 x float> %x) { ; CHECK-LABEL: round_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.round.v8f32(<8 x float> %x) ret <8 x float> %a @@ -210,17 +236,21 @@ define <16 x float> @round_v16f32(<16 x float> %x) { ; CHECK-LABEL: round_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.round.v16f32(<16 x float> %x) ret <16 x float> %a @@ -232,10 +262,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -252,10 +284,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -272,15 +306,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.round.v4f64(<4 x double> %x) ret <4 x double> %a @@ -292,15 +330,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.round.v8f64(<8 x double> %x) ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll @@ -10,18 +10,23 @@ ; CHECK-LABEL: roundeven_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.roundeven.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -32,18 +37,23 @@ ; CHECK-LABEL: roundeven_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.roundeven.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -54,18 +64,23 @@ ; CHECK-LABEL: roundeven_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.roundeven.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -76,18 +91,23 @@ ; CHECK-LABEL: roundeven_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.roundeven.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -98,18 +118,23 @@ ; CHECK-LABEL: roundeven_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.roundeven.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -121,18 +146,23 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.roundeven.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -144,17 +174,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.roundeven.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -166,17 +201,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.roundeven.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -188,17 +228,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -210,17 +255,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.roundeven.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -232,17 +282,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.roundeven.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -253,18 +308,23 @@ ; CHECK-LABEL: roundeven_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -275,18 +335,23 @@ ; CHECK-LABEL: roundeven_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -297,18 +362,23 @@ ; CHECK-LABEL: roundeven_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -319,18 +389,23 @@ ; CHECK-LABEL: roundeven_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.roundeven.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll @@ -11,10 +11,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,10 +33,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,10 +55,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -71,10 +77,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -91,15 +99,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %x) ret <16 x half> %a @@ -112,15 +124,19 @@ ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %x) ret <32 x half> %a @@ -130,12 +146,14 @@ define <1 x float> @roundeven_v1f32(<1 x float> %x) { ; CHECK-LABEL: roundeven_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -150,12 +168,14 @@ define <2 x float> @roundeven_v2f32(<2 x float> %x) { ; CHECK-LABEL: roundeven_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -170,12 +190,14 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) { ; CHECK-LABEL: roundeven_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -190,17 +212,21 @@ define <8 x float> @roundeven_v8f32(<8 x float> %x) { ; CHECK-LABEL: roundeven_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %x) ret <8 x float> %a @@ -210,17 +236,21 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) { ; CHECK-LABEL: roundeven_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %x) ret <16 x float> %a @@ -232,10 +262,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -252,10 +284,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -272,15 +306,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %x) ret <4 x double> %a @@ -292,15 +330,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %x) ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll @@ -8,16 +8,21 @@ ; CHECK-LABEL: trunc_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.trunc.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -28,16 +33,21 @@ ; CHECK-LABEL: trunc_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.trunc.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -48,16 +58,21 @@ ; CHECK-LABEL: trunc_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.trunc.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -68,16 +83,21 @@ ; CHECK-LABEL: trunc_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.trunc.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -88,16 +108,21 @@ ; CHECK-LABEL: trunc_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.trunc.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -109,16 +134,21 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.trunc.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -130,15 +160,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -150,15 +185,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.trunc.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -170,15 +210,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -190,15 +235,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -210,15 +260,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -229,16 +284,21 @@ ; CHECK-LABEL: trunc_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -249,16 +309,21 @@ ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -269,16 +334,21 @@ ; CHECK-LABEL: trunc_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -289,16 +359,21 @@ ; CHECK-LABEL: trunc_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -32,10 +32,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -76,10 +79,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -120,10 +126,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -166,15 +175,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -208,12 +222,15 @@ define <2 x float> @vp_nearbyint_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -252,12 +269,15 @@ define <4 x float> @vp_nearbyint_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -298,17 +318,22 @@ define <8 x float> @vp_nearbyint_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -344,17 +369,22 @@ define <16 x float> @vp_nearbyint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -390,10 +420,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -436,15 +469,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer @@ -482,15 +520,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -528,15 +571,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -574,15 +622,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -644,7 +697,9 @@ define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: @@ -652,10 +707,14 @@ ; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a1 @@ -665,10 +724,13 @@ ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll @@ -230,7 +230,7 @@ define <8 x i32> @vpselect_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpadd3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -30,9 +30,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -70,9 +73,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -110,9 +116,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -152,13 +161,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -190,11 +204,14 @@ define <2 x float> @vp_rint_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -230,11 +247,14 @@ define <4 x float> @vp_rint_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -272,15 +292,20 @@ define <8 x float> @vp_rint_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -314,15 +339,20 @@ define <16 x float> @vp_rint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -356,9 +386,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -398,13 +431,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer @@ -440,13 +478,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -482,13 +525,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -524,13 +572,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -601,32 +654,54 @@ define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -32,10 +32,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -76,10 +79,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -120,10 +126,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -166,15 +175,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -208,12 +222,15 @@ define <2 x float> @vp_round_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -252,12 +269,15 @@ define <4 x float> @vp_round_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -298,17 +318,22 @@ define <8 x float> @vp_round_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -344,17 +369,22 @@ define <16 x float> @vp_round_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -390,10 +420,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -436,15 +469,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer @@ -482,15 +520,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -528,15 +571,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -574,15 +622,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -668,36 +721,58 @@ define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -32,10 +32,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -76,10 +79,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -120,10 +126,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -166,15 +175,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -208,12 +222,15 @@ define <2 x float> @vp_roundeven_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -252,12 +269,15 @@ define <4 x float> @vp_roundeven_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -298,17 +318,22 @@ define <8 x float> @vp_roundeven_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -344,17 +369,22 @@ define <16 x float> @vp_roundeven_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -390,10 +420,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -436,15 +469,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer @@ -482,15 +520,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -528,15 +571,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -574,15 +622,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -668,36 +721,58 @@ define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -32,10 +32,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -76,10 +79,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -120,10 +126,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -166,15 +175,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -208,12 +222,15 @@ define <2 x float> @vp_roundtozero_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -252,12 +269,15 @@ define <4 x float> @vp_roundtozero_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -298,17 +318,22 @@ define <8 x float> @vp_roundtozero_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -344,17 +369,22 @@ define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -390,10 +420,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -436,15 +469,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer @@ -482,15 +520,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer @@ -528,15 +571,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -574,15 +622,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -668,36 +721,58 @@ define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -13,11 +13,13 @@ define @trunc_nxv1f32_to_si8( %x) { ; RV32-LABEL: trunc_nxv1f32_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -30,11 +32,13 @@ ; ; RV64-LABEL: trunc_nxv1f32_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -52,11 +56,13 @@ define @trunc_nxv1f32_to_ui8( %x) { ; RV32-LABEL: trunc_nxv1f32_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -69,11 +75,13 @@ ; ; RV64-LABEL: trunc_nxv1f32_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -207,15 +215,19 @@ define @trunc_nxv4f32_to_si8( %x) { ; RV32-LABEL: trunc_nxv4f32_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v10 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v10, fa5 -; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma @@ -224,15 +236,19 @@ ; ; RV64-LABEL: trunc_nxv4f32_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v10 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v10, fa5 -; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma @@ -246,15 +262,19 @@ define @trunc_nxv4f32_to_ui8( %x) { ; RV32-LABEL: trunc_nxv4f32_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v10 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v10, fa5 -; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma @@ -263,15 +283,19 @@ ; ; RV64-LABEL: trunc_nxv4f32_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v10 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v10, fa5 -; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vmflt.vf v10, v12, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma @@ -401,12 +425,14 @@ define @ceil_nxv1f32_to_si8( %x) { ; RV32-LABEL: ceil_nxv1f32_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -420,12 +446,14 @@ ; ; RV64-LABEL: ceil_nxv1f32_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -444,12 +472,14 @@ define @ceil_nxv1f32_to_ui8( %x) { ; RV32-LABEL: ceil_nxv1f32_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -463,12 +493,14 @@ ; ; RV64-LABEL: ceil_nxv1f32_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -639,17 +671,21 @@ define @ceil_nxv4f32_to_si8( %x) { ; RV32-LABEL: ceil_nxv4f32_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v10 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmflt.vf v10, v12, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma @@ -658,17 +694,21 @@ ; ; RV64-LABEL: ceil_nxv4f32_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v10 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vmflt.vf v10, v12, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma @@ -682,17 +722,21 @@ define @ceil_nxv4f32_to_ui8( %x) { ; RV32-LABEL: ceil_nxv4f32_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v10 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v10, fa5 +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmflt.vf v10, v12, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma @@ -701,17 +745,21 @@ ; ; RV64-LABEL: ceil_nxv4f32_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v10 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v10, fa5 +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vmflt.vf v10, v12, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t +; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -30,12 +30,15 @@ define @vp_floor_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -74,12 +77,15 @@ define @vp_floor_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -118,12 +124,15 @@ define @vp_floor_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,17 +173,22 @@ define @vp_floor_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -210,17 +224,22 @@ define @vp_floor_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -256,17 +275,22 @@ define @vp_floor_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -300,12 +324,15 @@ define @vp_floor_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -344,12 +371,15 @@ define @vp_floor_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -390,17 +420,22 @@ define @vp_floor_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -436,17 +471,22 @@ define @vp_floor_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -482,17 +522,22 @@ define @vp_floor_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -526,12 +571,15 @@ define @vp_floor_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -572,17 +620,22 @@ define @vp_floor_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -618,17 +671,22 @@ define @vp_floor_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -664,17 +722,22 @@ define @vp_floor_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -710,17 +773,22 @@ define @vp_floor_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -799,6 +867,14 @@ define @vp_floor_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -806,28 +882,42 @@ ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -10,18 +10,23 @@ ; CHECK-LABEL: nearbyint_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -33,18 +38,23 @@ ; CHECK-LABEL: nearbyint_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -56,18 +66,23 @@ ; CHECK-LABEL: nearbyint_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -79,18 +94,23 @@ ; CHECK-LABEL: nearbyint_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -102,18 +122,23 @@ ; CHECK-LABEL: nearbyint_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv16f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -125,18 +150,23 @@ ; CHECK-LABEL: nearbyint_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv32f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -149,17 +179,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -172,17 +207,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -195,17 +235,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -218,17 +263,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -241,17 +291,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv16f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -263,18 +318,23 @@ ; CHECK-LABEL: nearbyint_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -286,18 +346,23 @@ ; CHECK-LABEL: nearbyint_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -309,18 +374,23 @@ ; CHECK-LABEL: nearbyint_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -332,18 +402,23 @@ ; CHECK-LABEL: nearbyint_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -9,10 +9,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -29,10 +31,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -49,10 +53,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -69,15 +75,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8f16( %x) ret %a @@ -89,15 +99,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv16f16( %x) ret %a @@ -109,15 +123,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv32f16( %x) ret %a @@ -127,12 +145,14 @@ define @nearbyint_nxv1f32( %x) { ; CHECK-LABEL: nearbyint_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -147,12 +167,14 @@ define @nearbyint_nxv2f32( %x) { ; CHECK-LABEL: nearbyint_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -167,17 +189,21 @@ define @nearbyint_nxv4f32( %x) { ; CHECK-LABEL: nearbyint_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv4f32( %x) ret %a @@ -187,17 +213,21 @@ define @nearbyint_nxv8f32( %x) { ; CHECK-LABEL: nearbyint_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8f32( %x) ret %a @@ -207,17 +237,21 @@ define @nearbyint_nxv16f32( %x) { ; CHECK-LABEL: nearbyint_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv16f32( %x) ret %a @@ -229,10 +263,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -249,15 +285,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv2f64( %x) ret %a @@ -269,15 +309,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv4f64( %x) ret %a @@ -289,15 +333,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll @@ -9,9 +9,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -27,9 +29,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -45,9 +49,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -63,13 +69,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv8f16( %x) ret %a @@ -81,13 +91,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv16f16( %x) ret %a @@ -99,13 +113,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv32f16( %x) ret %a @@ -115,11 +133,13 @@ define @rint_nxv1f32( %x) { ; CHECK-LABEL: rint_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -133,11 +153,13 @@ define @rint_nxv2f32( %x) { ; CHECK-LABEL: rint_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -151,15 +173,19 @@ define @rint_nxv4f32( %x) { ; CHECK-LABEL: rint_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv4f32( %x) ret %a @@ -169,15 +195,19 @@ define @rint_nxv8f32( %x) { ; CHECK-LABEL: rint_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv8f32( %x) ret %a @@ -187,15 +217,19 @@ define @rint_nxv16f32( %x) { ; CHECK-LABEL: rint_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv16f32( %x) ret %a @@ -207,9 +241,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -225,13 +261,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv2f64( %x) ret %a @@ -243,13 +283,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv4f64( %x) ret %a @@ -261,13 +305,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.rint.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll @@ -10,18 +10,23 @@ ; CHECK-LABEL: round_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -32,18 +37,23 @@ ; CHECK-LABEL: round_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -54,18 +64,23 @@ ; CHECK-LABEL: round_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -76,18 +91,23 @@ ; CHECK-LABEL: round_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -98,18 +118,23 @@ ; CHECK-LABEL: round_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -120,18 +145,23 @@ ; CHECK-LABEL: round_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -143,17 +173,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -165,17 +200,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -187,17 +227,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -209,17 +254,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -231,17 +281,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -252,18 +307,23 @@ ; CHECK-LABEL: round_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -274,18 +334,23 @@ ; CHECK-LABEL: round_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -296,18 +361,23 @@ ; CHECK-LABEL: round_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -318,18 +388,23 @@ ; CHECK-LABEL: round_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll @@ -11,10 +11,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,10 +33,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,10 +55,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -71,15 +77,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f16( %x) ret %a @@ -91,15 +101,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv16f16( %x) ret %a @@ -111,15 +125,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv32f16( %x) ret %a @@ -129,12 +147,14 @@ define @round_nxv1f32( %x) { ; CHECK-LABEL: round_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -149,12 +169,14 @@ define @round_nxv2f32( %x) { ; CHECK-LABEL: round_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -169,17 +191,21 @@ define @round_nxv4f32( %x) { ; CHECK-LABEL: round_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv4f32( %x) ret %a @@ -189,17 +215,21 @@ define @round_nxv8f32( %x) { ; CHECK-LABEL: round_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f32( %x) ret %a @@ -209,17 +239,21 @@ define @round_nxv16f32( %x) { ; CHECK-LABEL: round_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv16f32( %x) ret %a @@ -231,10 +265,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -251,15 +287,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv2f64( %x) ret %a @@ -271,15 +311,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv4f64( %x) ret %a @@ -291,15 +335,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.round.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll @@ -10,18 +10,23 @@ ; CHECK-LABEL: roundeven_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -32,18 +37,23 @@ ; CHECK-LABEL: roundeven_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -54,18 +64,23 @@ ; CHECK-LABEL: roundeven_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -76,18 +91,23 @@ ; CHECK-LABEL: roundeven_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -98,18 +118,23 @@ ; CHECK-LABEL: roundeven_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -120,18 +145,23 @@ ; CHECK-LABEL: roundeven_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -143,17 +173,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -165,17 +200,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -187,17 +227,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -209,17 +254,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -231,17 +281,22 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -252,18 +307,23 @@ ; CHECK-LABEL: roundeven_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -274,18 +334,23 @@ ; CHECK-LABEL: roundeven_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -296,18 +361,23 @@ ; CHECK-LABEL: roundeven_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -318,18 +388,23 @@ ; CHECK-LABEL: roundeven_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll @@ -11,10 +11,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,10 +33,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,10 +55,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -71,15 +77,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv8f16( %x) ret %a @@ -91,15 +101,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv16f16( %x) ret %a @@ -111,15 +125,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv32f16( %x) ret %a @@ -129,12 +147,14 @@ define @roundeven_nxv1f32( %x) { ; CHECK-LABEL: roundeven_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -149,12 +169,14 @@ define @roundeven_nxv2f32( %x) { ; CHECK-LABEL: roundeven_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -169,17 +191,21 @@ define @roundeven_nxv4f32( %x) { ; CHECK-LABEL: roundeven_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv4f32( %x) ret %a @@ -189,17 +215,21 @@ define @roundeven_nxv8f32( %x) { ; CHECK-LABEL: roundeven_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv8f32( %x) ret %a @@ -209,17 +239,21 @@ define @roundeven_nxv16f32( %x) { ; CHECK-LABEL: roundeven_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv16f32( %x) ret %a @@ -231,10 +265,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -251,15 +287,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv2f64( %x) ret %a @@ -271,15 +311,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv4f64( %x) ret %a @@ -291,15 +335,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.roundeven.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll @@ -8,16 +8,21 @@ ; CHECK-LABEL: trunc_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -28,16 +33,21 @@ ; CHECK-LABEL: trunc_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -48,16 +58,21 @@ ; CHECK-LABEL: trunc_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -68,16 +83,21 @@ ; CHECK-LABEL: trunc_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -88,16 +108,21 @@ ; CHECK-LABEL: trunc_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -108,16 +133,21 @@ ; CHECK-LABEL: trunc_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -129,15 +159,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -149,15 +184,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -169,15 +209,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -189,15 +234,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -209,15 +259,20 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -228,16 +283,21 @@ ; CHECK-LABEL: trunc_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmflt.vf v9, v10, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -248,16 +308,21 @@ ; CHECK-LABEL: trunc_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -268,16 +333,21 @@ ; CHECK-LABEL: trunc_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -288,16 +358,21 @@ ; CHECK-LABEL: trunc_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll @@ -9,9 +9,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -27,9 +29,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -45,9 +49,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -63,13 +69,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv8f16( %x) ret %a @@ -81,13 +91,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv16f16( %x) ret %a @@ -99,13 +113,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv32f16( %x) ret %a @@ -115,11 +133,13 @@ define @trunc_nxv1f32( %x) { ; CHECK-LABEL: trunc_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -133,11 +153,13 @@ define @trunc_nxv2f32( %x) { ; CHECK-LABEL: trunc_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -151,15 +173,19 @@ define @trunc_nxv4f32( %x) { ; CHECK-LABEL: trunc_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv4f32( %x) ret %a @@ -169,15 +195,19 @@ define @trunc_nxv8f32( %x) { ; CHECK-LABEL: trunc_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv8f32( %x) ret %a @@ -187,15 +217,19 @@ define @trunc_nxv16f32( %x) { ; CHECK-LABEL: trunc_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv16f32( %x) ret %a @@ -207,9 +241,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -225,13 +261,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv2f64( %x) ret %a @@ -243,13 +283,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv4f64( %x) ret %a @@ -261,13 +305,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.trunc.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll @@ -125,9 +125,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI6_0) ; RV32-NEXT: flh fa5, %lo(.LCPI6_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -141,9 +143,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI6_0) ; RV64-NEXT: flh fa5, %lo(.LCPI6_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -162,9 +166,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: flh fa5, %lo(.LCPI7_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -178,9 +184,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: flh fa5, %lo(.LCPI7_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -315,9 +323,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI14_0) ; RV32-NEXT: flh fa5, %lo(.LCPI14_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -331,9 +341,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI14_0) ; RV64-NEXT: flh fa5, %lo(.LCPI14_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -352,9 +364,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI15_0) ; RV32-NEXT: flh fa5, %lo(.LCPI15_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -368,9 +382,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI15_0) ; RV64-NEXT: flh fa5, %lo(.LCPI15_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -541,10 +557,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI22_0) ; RV32-NEXT: flh fa5, %lo(.LCPI22_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -559,10 +577,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI22_0) ; RV64-NEXT: flh fa5, %lo(.LCPI22_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -582,10 +602,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI23_0) ; RV32-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -600,10 +622,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI23_0) ; RV64-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -775,10 +799,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI30_0) ; RV32-NEXT: flh fa5, %lo(.LCPI30_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -793,10 +819,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI30_0) ; RV64-NEXT: flh fa5, %lo(.LCPI30_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -816,10 +844,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI31_0) ; RV32-NEXT: flh fa5, %lo(.LCPI31_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -834,10 +864,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI31_0) ; RV64-NEXT: flh fa5, %lo(.LCPI31_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: vmflt.vf v0, v9, fa5, v0.t ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -30,12 +30,15 @@ define @vp_nearbyint_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -74,12 +77,15 @@ define @vp_nearbyint_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -118,12 +124,15 @@ define @vp_nearbyint_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -164,17 +173,22 @@ define @vp_nearbyint_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -210,17 +224,22 @@ define @vp_nearbyint_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -256,17 +275,22 @@ define @vp_nearbyint_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -300,12 +324,15 @@ define @vp_nearbyint_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -344,12 +371,15 @@ define @vp_nearbyint_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -390,17 +420,22 @@ define @vp_nearbyint_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -436,17 +471,22 @@ define @vp_nearbyint_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -482,17 +522,22 @@ define @vp_nearbyint_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -526,12 +571,15 @@ define @vp_nearbyint_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a0 @@ -572,17 +620,22 @@ define @vp_nearbyint_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -618,17 +671,22 @@ define @vp_nearbyint_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -664,17 +722,22 @@ define @vp_nearbyint_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -710,17 +773,22 @@ define @vp_nearbyint_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -783,6 +851,8 @@ define @vp_nearbyint_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -790,10 +860,14 @@ ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: frflags a2 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a2 @@ -803,10 +877,13 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -28,11 +28,14 @@ define @vp_rint_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -68,11 +71,14 @@ define @vp_rint_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -108,11 +114,14 @@ define @vp_rint_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -150,15 +159,20 @@ define @vp_rint_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -192,15 +206,20 @@ define @vp_rint_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -234,15 +253,20 @@ define @vp_rint_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -274,11 +298,14 @@ define @vp_rint_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -314,11 +341,14 @@ define @vp_rint_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -356,15 +386,20 @@ define @vp_rint_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -398,15 +433,20 @@ define @vp_rint_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -440,15 +480,20 @@ define @vp_rint_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -480,11 +525,14 @@ define @vp_rint_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -522,15 +570,20 @@ define @vp_rint_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -564,15 +617,20 @@ define @vp_rint_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -606,15 +664,20 @@ define @vp_rint_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -648,15 +711,20 @@ define @vp_rint_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -728,6 +796,14 @@ define @vp_rint_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -735,24 +811,38 @@ ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -30,12 +30,15 @@ define @vp_round_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -74,12 +77,15 @@ define @vp_round_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -118,12 +124,15 @@ define @vp_round_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,17 +173,22 @@ define @vp_round_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -210,17 +224,22 @@ define @vp_round_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -256,17 +275,22 @@ define @vp_round_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -300,12 +324,15 @@ define @vp_round_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -344,12 +371,15 @@ define @vp_round_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -390,17 +420,22 @@ define @vp_round_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -436,17 +471,22 @@ define @vp_round_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -482,17 +522,22 @@ define @vp_round_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -526,12 +571,15 @@ define @vp_round_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -572,17 +620,22 @@ define @vp_round_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -618,17 +671,22 @@ define @vp_round_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -664,17 +722,22 @@ define @vp_round_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -710,17 +773,22 @@ define @vp_round_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -799,6 +867,14 @@ define @vp_round_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -806,28 +882,42 @@ ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -30,12 +30,15 @@ define @vp_roundeven_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -74,12 +77,15 @@ define @vp_roundeven_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -118,12 +124,15 @@ define @vp_roundeven_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,17 +173,22 @@ define @vp_roundeven_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -210,17 +224,22 @@ define @vp_roundeven_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -256,17 +275,22 @@ define @vp_roundeven_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -300,12 +324,15 @@ define @vp_roundeven_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -344,12 +371,15 @@ define @vp_roundeven_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -390,17 +420,22 @@ define @vp_roundeven_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -436,17 +471,22 @@ define @vp_roundeven_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -482,17 +522,22 @@ define @vp_roundeven_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -526,12 +571,15 @@ define @vp_roundeven_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -572,17 +620,22 @@ define @vp_roundeven_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -618,17 +671,22 @@ define @vp_roundeven_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -664,17 +722,22 @@ define @vp_roundeven_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -710,17 +773,22 @@ define @vp_roundeven_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -799,6 +867,14 @@ define @vp_roundeven_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -806,28 +882,42 @@ ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -30,12 +30,15 @@ define @vp_roundtozero_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -74,12 +77,15 @@ define @vp_roundtozero_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -118,12 +124,15 @@ define @vp_roundtozero_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,17 +173,22 @@ define @vp_roundtozero_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -210,17 +224,22 @@ define @vp_roundtozero_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -256,17 +275,22 @@ define @vp_roundtozero_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -300,12 +324,15 @@ define @vp_roundtozero_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -344,12 +371,15 @@ define @vp_roundtozero_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -390,17 +420,22 @@ define @vp_roundtozero_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -436,17 +471,22 @@ define @vp_roundtozero_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -482,17 +522,22 @@ define @vp_roundtozero_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -526,12 +571,15 @@ define @vp_roundtozero_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -572,17 +620,22 @@ define @vp_roundtozero_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmset.m v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -618,17 +671,22 @@ define @vp_roundtozero_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmset.m v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -664,17 +722,22 @@ define @vp_roundtozero_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -710,17 +773,22 @@ define @vp_roundtozero_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmset.m v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -799,6 +867,14 @@ define @vp_roundtozero_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -806,28 +882,42 @@ ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v2, v1 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v2, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmflt.vf v1, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -472,7 +472,7 @@ define @vpselect_vpadd3( %passthru, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpadd3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0