diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3180,26 +3180,25 @@ if (!usesAllOnesMask(N, MaskOpIdx)) return false; - // Retrieve the tail policy operand index, if any. - std::optional TailPolicyOpIdx; const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); - bool UseTUPseudo = false; - if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { + // Retrieve the tail policy operand index, if any. + std::optional TailPolicyOpIdx; + if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) TailPolicyOpIdx = getVecPolicyOpIdx(N, MaskedMCID); - // Some operations are their own TU. - if (I->UnmaskedTUPseudo == I->UnmaskedPseudo) { - UseTUPseudo = true; - } else { - if (!isImplicitDef(N->getOperand(0))) { - // Keep the true-masked instruction when there is no unmasked TU - // instruction - if (I->UnmaskedTUPseudo == I->MaskedPseudo) - return false; - UseTUPseudo = true; - } - } - } + + // If the merge operand is implicit_def, then we can throw it away and use + // the tail undefined pseudo. + bool UseTUPseudo = !isImplicitDef(N->getOperand(0)); + + // Some unmasked pseudos are their own TU. + if (I->UnmaskedTUPseudo == I->UnmaskedPseudo) + UseTUPseudo = true; + + // Some instructions don't have unmasked TU variants. So if this instruction + // doesn't have one, bail. + if (UseTUPseudo && I->UnmaskedTUPseudo == I->MaskedPseudo) + return false; unsigned Opc = UseTUPseudo ? I->UnmaskedTUPseudo : I->UnmaskedPseudo; const MCInstrDesc &MCID = TII->get(Opc); @@ -3224,6 +3223,14 @@ Ops.push_back(Op); } + // Add the policy operand if the masked pseudo didn't have one but the + // unmasked one does. + if (!TailPolicyOpIdx && RISCVII::hasVecPolicyOp(MCID.TSFlags)) { + SDValue Policy = CurDAG->getTargetConstant(RISCVII::MASK_AGNOSTIC, SDLoc(N), + Subtarget->getXLenVT()); + Ops.push_back(Policy); + } + // Transitively apply any node glued to our new node. const auto *Glued = N->getGluedNode(); if (auto *TGlued = Glued->getGluedNode()) diff --git a/llvm/test/CodeGen/RISCV/rvv/allone-masked-to-unmasked.ll b/llvm/test/CodeGen/RISCV/rvv/allone-masked-to-unmasked.ll --- a/llvm/test/CodeGen/RISCV/rvv/allone-masked-to-unmasked.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allone-masked-to-unmasked.ll @@ -102,6 +102,8 @@ define @test4(ptr %0, %1, %2, iXLen %3) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.ceil.nxv1f16(, , i32) @@ -30,8 +30,10 @@ define @vp_ceil_vv_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,8 +76,10 @@ define @vp_ceil_vv_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -118,8 +122,10 @@ define @vp_ceil_vv_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -164,8 +170,10 @@ define @vp_ceil_vv_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -210,8 +218,10 @@ define @vp_ceil_vv_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -256,8 +266,10 @@ define @vp_ceil_vv_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -300,6 +312,8 @@ define @vp_ceil_vv_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define @vp_ceil_vv_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ define @vp_ceil_vv_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -436,6 +454,8 @@ define @vp_ceil_vv_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -482,6 +502,8 @@ define @vp_ceil_vv_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 @@ -526,8 +548,10 @@ define @vp_ceil_vv_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -572,8 +596,10 @@ define @vp_ceil_vv_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -618,8 +644,10 @@ define @vp_ceil_vv_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -664,8 +692,10 @@ define @vp_ceil_vv_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -710,8 +740,10 @@ define @vp_ceil_vv_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -799,6 +831,16 @@ define @vp_ceil_vv_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -807,20 +849,34 @@ ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v8, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -828,6 +884,10 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -17,6 +17,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI0_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -36,6 +37,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI0_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -60,6 +62,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI1_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -79,6 +82,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI1_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -103,6 +107,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI2_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -120,6 +125,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI2_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -142,6 +148,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI3_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -159,6 +166,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI3_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -259,6 +267,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI8_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -278,6 +287,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI8_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -302,6 +312,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI9_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -321,6 +332,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI9_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -345,6 +357,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI10_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -362,6 +375,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI10_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -384,6 +398,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -401,6 +416,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -501,6 +517,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -522,6 +539,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -548,6 +566,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -569,6 +588,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -595,6 +615,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -614,6 +635,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -638,6 +660,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -657,6 +680,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -783,6 +807,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -804,6 +829,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -830,6 +856,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -851,6 +878,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -877,6 +905,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -896,6 +925,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -920,6 +950,7 @@ ; RV32-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -939,6 +970,7 @@ ; RV64-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll @@ -11,15 +11,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -33,15 +35,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -55,15 +59,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -77,15 +83,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -99,15 +107,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -121,15 +131,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -141,17 +153,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -163,17 +177,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -185,17 +201,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -207,17 +225,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -229,17 +249,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -253,15 +275,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -275,15 +299,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -297,15 +323,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -319,15 +347,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.ceil.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -31,6 +32,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -51,6 +53,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -71,6 +74,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -91,6 +95,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -111,6 +116,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -129,6 +135,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -149,6 +156,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -169,6 +177,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -189,6 +198,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -209,6 +219,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -231,6 +242,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -251,6 +263,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -271,6 +284,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -291,6 +305,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll @@ -11,15 +11,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -33,15 +35,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -55,15 +59,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -77,15 +83,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -99,15 +107,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -121,15 +131,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -141,17 +153,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -163,17 +177,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -185,17 +201,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -207,17 +225,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -229,17 +249,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -253,15 +275,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -275,15 +299,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -297,15 +323,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -319,15 +347,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.floor.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -31,6 +32,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -51,6 +53,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -71,6 +74,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -91,6 +95,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -111,6 +116,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -129,6 +135,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -149,6 +156,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -169,6 +177,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -189,6 +198,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -209,6 +219,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -231,6 +242,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -251,6 +263,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -271,6 +284,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -291,6 +305,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.ceil.v2f16(<2 x half>, <2 x i1>, i32) @@ -32,6 +32,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,6 +78,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -120,6 +124,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -166,6 +172,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -208,6 +216,8 @@ define <2 x float> @vp_ceil_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -252,6 +262,8 @@ define <4 x float> @vp_ceil_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -298,6 +310,8 @@ define <8 x float> @vp_ceil_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define <16 x float> @vp_ceil_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -436,6 +454,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -482,6 +502,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -528,6 +550,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -574,6 +598,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -668,7 +694,22 @@ define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: @@ -677,27 +718,63 @@ ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v0, v16 +; CHECK-NEXT: vmflt.vf v8, v0, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll @@ -11,15 +11,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.ceil.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -33,15 +35,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.ceil.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -55,15 +59,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.ceil.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -77,15 +83,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.ceil.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -99,15 +107,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.ceil.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -122,15 +132,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.ceil.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -142,17 +154,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -164,17 +178,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.ceil.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -186,17 +202,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -208,17 +226,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -230,17 +250,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -254,15 +276,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -276,15 +300,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -298,15 +324,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -320,15 +348,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll @@ -11,15 +11,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.floor.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -33,15 +35,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.floor.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -55,15 +59,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.floor.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -77,15 +83,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.floor.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -99,15 +107,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.floor.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -122,15 +132,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.floor.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -142,17 +154,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -164,17 +178,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.floor.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -186,17 +202,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -208,17 +226,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -230,17 +250,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -254,15 +276,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -276,15 +300,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -298,15 +324,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -320,15 +348,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.floor.v2f16(<2 x half>, <2 x i1>, i32) @@ -32,6 +32,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,6 +78,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -120,6 +124,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -166,6 +172,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -208,6 +216,8 @@ define <2 x float> @vp_floor_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -252,6 +262,8 @@ define <4 x float> @vp_floor_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -298,6 +310,8 @@ define <8 x float> @vp_floor_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define <16 x float> @vp_floor_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -436,6 +454,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -482,6 +502,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -528,6 +550,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -574,6 +598,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -668,7 +694,22 @@ define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: @@ -677,27 +718,63 @@ ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v0, v16 +; CHECK-NEXT: vmflt.vf v8, v0, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -13,15 +13,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x half> %r @@ -36,15 +38,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x half> %r @@ -59,15 +63,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x half> %r @@ -82,15 +88,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x half> %r @@ -106,15 +114,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <32 x half> %r @@ -127,17 +137,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x float> %r @@ -150,17 +162,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x float> %r @@ -173,17 +187,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x float> %r @@ -196,17 +212,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x float> %r @@ -221,15 +239,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x double> %r @@ -244,15 +264,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x double> %r @@ -267,15 +289,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x double> %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -2422,6 +2422,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI115_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI115_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2445,6 +2446,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI116_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2466,6 +2468,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -2490,6 +2493,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI118_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI118_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2512,6 +2516,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI119_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI119_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2537,6 +2542,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI120_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2560,6 +2566,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -2586,6 +2593,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI122_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI122_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2610,6 +2618,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI123_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI123_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2635,6 +2644,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI124_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2658,6 +2668,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -2684,6 +2695,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI126_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI126_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2708,6 +2720,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI127_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI127_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2733,6 +2746,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI128_0)(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2756,6 +2770,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -2782,6 +2797,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI130_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI130_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2806,6 +2822,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI131_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI131_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2826,6 +2843,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -2850,6 +2868,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI133_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI133_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -2872,6 +2891,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI134_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI134_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -2894,6 +2914,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -2920,6 +2941,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI136_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI136_0)(a1) ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll @@ -13,15 +13,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.round.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -35,15 +37,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.round.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -57,15 +61,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.round.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -79,15 +85,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.round.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -101,15 +109,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.round.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -124,15 +134,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.round.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -144,17 +156,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -166,17 +180,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.round.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -188,17 +204,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -210,17 +228,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.round.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -232,17 +252,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.round.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -256,15 +278,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -278,15 +302,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -300,15 +326,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -322,15 +350,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.round.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -33,6 +34,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -53,6 +55,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -73,6 +76,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -93,6 +97,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -114,6 +119,7 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -132,6 +138,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -152,6 +159,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -172,6 +180,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -192,6 +201,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -212,6 +222,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -234,6 +245,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -254,6 +266,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -274,6 +287,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -294,6 +308,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll @@ -13,15 +13,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.roundeven.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -35,15 +37,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.roundeven.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -57,15 +61,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.roundeven.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -79,15 +85,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.roundeven.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -101,15 +109,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.roundeven.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -124,15 +134,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.roundeven.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -144,17 +156,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.roundeven.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -166,17 +180,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.roundeven.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -188,17 +204,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -210,17 +228,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.roundeven.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -232,17 +252,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.roundeven.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -256,15 +278,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -278,15 +302,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -300,15 +326,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -322,15 +350,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.roundeven.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -33,6 +34,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -53,6 +55,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -73,6 +76,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -93,6 +97,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -114,6 +119,7 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -132,6 +138,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -152,6 +159,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -172,6 +180,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -192,6 +201,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -212,6 +222,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -234,6 +245,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -254,6 +266,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -274,6 +287,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -294,6 +308,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll @@ -11,13 +11,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x half> @llvm.experimental.constrained.trunc.v1f16(<1 x half> %x, metadata !"fpexcept.strict") ret <1 x half> %a @@ -31,13 +33,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x half> @llvm.experimental.constrained.trunc.v2f16(<2 x half> %x, metadata !"fpexcept.strict") ret <2 x half> %a @@ -51,13 +55,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x half> @llvm.experimental.constrained.trunc.v4f16(<4 x half> %x, metadata !"fpexcept.strict") ret <4 x half> %a @@ -71,13 +77,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x half> @llvm.experimental.constrained.trunc.v8f16(<8 x half> %x, metadata !"fpexcept.strict") ret <8 x half> %a @@ -91,13 +99,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x half> @llvm.experimental.constrained.trunc.v16f16(<16 x half> %x, metadata !"fpexcept.strict") ret <16 x half> %a @@ -112,13 +122,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <32 x half> @llvm.experimental.constrained.trunc.v32f16(<32 x half> %x, metadata !"fpexcept.strict") ret <32 x half> %a @@ -130,15 +142,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float> %x, metadata !"fpexcept.strict") ret <1 x float> %a @@ -150,15 +164,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x float> @llvm.experimental.constrained.trunc.v2f32(<2 x float> %x, metadata !"fpexcept.strict") ret <2 x float> %a @@ -170,15 +186,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float> %x, metadata !"fpexcept.strict") ret <4 x float> %a @@ -190,15 +208,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmflt.vf v10, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float> %x, metadata !"fpexcept.strict") ret <8 x float> %a @@ -210,15 +230,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmflt.vf v12, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float> %x, metadata !"fpexcept.strict") ret <16 x float> %a @@ -232,13 +254,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double> %x, metadata !"fpexcept.strict") ret <1 x double> %a @@ -252,13 +276,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %x, metadata !"fpexcept.strict") ret <2 x double> %a @@ -272,13 +298,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double> %x, metadata !"fpexcept.strict") ret <4 x double> %a @@ -292,13 +320,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %x, metadata !"fpexcept.strict") ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -32,6 +32,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,6 +78,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -120,6 +124,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -166,6 +172,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -208,6 +216,8 @@ define <2 x float> @vp_nearbyint_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -252,6 +262,8 @@ define <4 x float> @vp_nearbyint_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -298,6 +310,8 @@ define <8 x float> @vp_nearbyint_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define <16 x float> @vp_nearbyint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -436,6 +454,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -482,6 +502,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -528,6 +550,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -574,6 +598,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -644,7 +670,9 @@ define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: @@ -654,6 +682,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -667,8 +696,9 @@ ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmflt.vf v1, v24, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.rint.v2f16(<2 x half>, <2 x i1>, i32) @@ -30,6 +30,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -70,6 +72,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -110,6 +114,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -152,6 +158,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -190,6 +198,8 @@ define <2 x float> @vp_rint_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -230,6 +240,8 @@ define <4 x float> @vp_rint_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -272,6 +284,8 @@ define <8 x float> @vp_rint_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -314,6 +328,8 @@ define <16 x float> @vp_rint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -356,6 +372,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -398,6 +416,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -440,6 +460,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -482,6 +504,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -524,6 +548,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -601,7 +627,22 @@ define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: @@ -610,23 +651,59 @@ ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v0, v16 +; CHECK-NEXT: vmflt.vf v8, v0, fa5 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.round.v2f16(<2 x half>, <2 x i1>, i32) @@ -32,6 +32,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,6 +78,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -120,6 +124,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -166,6 +172,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -208,6 +216,8 @@ define <2 x float> @vp_round_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -252,6 +262,8 @@ define <4 x float> @vp_round_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -298,6 +310,8 @@ define <8 x float> @vp_round_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define <16 x float> @vp_round_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -436,6 +454,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -482,6 +502,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -528,6 +550,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -574,6 +598,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -668,7 +694,22 @@ define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: @@ -677,27 +718,63 @@ ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v0, v16 +; CHECK-NEXT: vmflt.vf v8, v0, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.roundeven.v2f16(<2 x half>, <2 x i1>, i32) @@ -32,6 +32,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,6 +78,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -120,6 +124,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -166,6 +172,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -208,6 +216,8 @@ define <2 x float> @vp_roundeven_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -252,6 +262,8 @@ define <4 x float> @vp_roundeven_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -298,6 +310,8 @@ define <8 x float> @vp_roundeven_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define <16 x float> @vp_roundeven_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -436,6 +454,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -482,6 +502,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -528,6 +550,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -574,6 +598,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -668,7 +694,22 @@ define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: @@ -677,27 +718,63 @@ ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v0, v16 +; CHECK-NEXT: vmflt.vf v8, v0, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half>, <2 x i1>, i32) @@ -32,6 +32,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,6 +78,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -120,6 +124,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -166,6 +172,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -208,6 +216,8 @@ define <2 x float> @vp_roundtozero_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -252,6 +262,8 @@ define <4 x float> @vp_roundtozero_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -298,6 +310,8 @@ define <8 x float> @vp_roundtozero_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI17_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -436,6 +454,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI19_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -482,6 +502,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI21_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -528,6 +550,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -574,6 +598,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -668,7 +694,22 @@ define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vmset.m v8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: @@ -677,27 +718,63 @@ ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v0, v16 +; CHECK-NEXT: vmflt.vf v8, v0, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -15,6 +15,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 @@ -32,6 +33,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 @@ -54,6 +56,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 @@ -71,6 +74,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 @@ -209,6 +213,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 ; RV32-NEXT: vmflt.vf v0, v10, fa5 @@ -226,6 +231,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v10, fa5 @@ -248,6 +254,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 ; RV32-NEXT: vmflt.vf v0, v10, fa5 @@ -265,6 +272,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v10, fa5 @@ -403,6 +411,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 @@ -422,6 +431,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 @@ -446,6 +456,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 @@ -465,6 +476,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 @@ -641,6 +653,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 ; RV32-NEXT: vmflt.vf v0, v10, fa5 @@ -660,6 +673,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v10, fa5 @@ -684,6 +698,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vfabs.v v10, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 ; RV32-NEXT: vmflt.vf v0, v10, fa5 @@ -703,6 +718,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vfabs.v v10, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 ; RV64-NEXT: vmflt.vf v0, v10, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -30,8 +30,10 @@ define @vp_floor_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,8 +76,10 @@ define @vp_floor_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -118,8 +122,10 @@ define @vp_floor_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -164,8 +170,10 @@ define @vp_floor_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -210,8 +218,10 @@ define @vp_floor_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -256,8 +266,10 @@ define @vp_floor_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -300,6 +312,8 @@ define @vp_floor_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define @vp_floor_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ define @vp_floor_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -436,6 +454,8 @@ define @vp_floor_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -482,6 +502,8 @@ define @vp_floor_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 @@ -526,8 +548,10 @@ define @vp_floor_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -572,8 +596,10 @@ define @vp_floor_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -618,8 +644,10 @@ define @vp_floor_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -664,8 +692,10 @@ define @vp_floor_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -710,8 +740,10 @@ define @vp_floor_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -799,6 +831,16 @@ define @vp_floor_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -807,20 +849,34 @@ ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v8, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -828,6 +884,10 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -13,15 +13,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -36,15 +38,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -59,15 +63,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -82,15 +88,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -105,15 +113,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv16f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -128,15 +138,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv32f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -149,17 +161,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -172,17 +186,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -195,17 +211,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -218,17 +236,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -241,17 +261,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv16f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -266,15 +288,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -289,15 +313,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -312,15 +338,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -335,15 +363,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -31,6 +32,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -51,6 +53,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -71,6 +74,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -91,6 +95,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -111,6 +116,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -129,6 +135,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -149,6 +156,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -169,6 +177,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -189,6 +198,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -209,6 +219,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -231,6 +242,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -251,6 +263,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -271,6 +284,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -291,6 +305,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,6 +30,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -47,6 +49,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -65,6 +68,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -83,6 +87,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -101,6 +106,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -117,6 +123,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -135,6 +142,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -153,6 +161,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -171,6 +180,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -189,6 +199,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -209,6 +220,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -227,6 +239,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -245,6 +258,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -263,6 +277,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll @@ -13,15 +13,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -35,15 +37,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -57,15 +61,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -79,15 +85,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -101,15 +109,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -123,15 +133,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -143,17 +155,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -165,17 +179,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -187,17 +203,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -209,17 +227,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -231,17 +251,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -255,15 +277,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -277,15 +301,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -299,15 +325,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -321,15 +349,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.round.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -33,6 +34,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -53,6 +55,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -73,6 +76,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -93,6 +97,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -113,6 +118,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -131,6 +137,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -151,6 +158,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -171,6 +179,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -191,6 +200,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -211,6 +221,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -233,6 +244,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -253,6 +265,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -273,6 +286,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -293,6 +307,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll @@ -13,15 +13,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -35,15 +37,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -57,15 +61,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -79,15 +85,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -101,15 +109,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -123,15 +133,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -143,17 +155,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -165,17 +179,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -187,17 +203,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -209,17 +227,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -231,17 +251,19 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -255,15 +277,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -277,15 +301,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -299,15 +325,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -321,15 +349,17 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.roundeven.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -33,6 +34,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -53,6 +55,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -73,6 +76,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -93,6 +97,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -113,6 +118,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -131,6 +137,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -151,6 +158,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -171,6 +179,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -191,6 +200,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -211,6 +221,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -233,6 +244,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -253,6 +265,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t @@ -273,6 +286,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -293,6 +307,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll @@ -11,13 +11,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv1f16( %x, metadata !"fpexcept.strict") ret %a @@ -31,13 +33,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv2f16( %x, metadata !"fpexcept.strict") ret %a @@ -51,13 +55,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv4f16( %x, metadata !"fpexcept.strict") ret %a @@ -71,13 +77,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv8f16( %x, metadata !"fpexcept.strict") ret %a @@ -91,13 +99,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv16f16( %x, metadata !"fpexcept.strict") ret %a @@ -111,13 +121,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv32f16( %x, metadata !"fpexcept.strict") ret %a @@ -129,15 +141,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv1f32( %x, metadata !"fpexcept.strict") ret %a @@ -149,15 +163,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv2f32( %x, metadata !"fpexcept.strict") ret %a @@ -169,15 +185,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vmflt.vf v10, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv4f32( %x, metadata !"fpexcept.strict") ret %a @@ -189,15 +207,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vmflt.vf v12, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv8f32( %x, metadata !"fpexcept.strict") ret %a @@ -209,15 +229,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vmflt.vf v16, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv16f32( %x, metadata !"fpexcept.strict") ret %a @@ -231,13 +253,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmset.m v9 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmflt.vf v9, v10, fa5 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv1f64( %x, metadata !"fpexcept.strict") ret %a @@ -251,13 +275,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmset.m v10 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmflt.vf v10, v12, fa5 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv2f64( %x, metadata !"fpexcept.strict") ret %a @@ -271,13 +297,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmset.m v12 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmflt.vf v12, v16, fa5 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv4f64( %x, metadata !"fpexcept.strict") ret %a @@ -291,13 +319,15 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmflt.vf v16, v24, fa5 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %a = call @llvm.experimental.constrained.trunc.nxv8f64( %x, metadata !"fpexcept.strict") ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,6 +30,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -47,6 +49,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -65,6 +68,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -83,6 +87,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -101,6 +106,7 @@ ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -117,6 +123,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -135,6 +142,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -153,6 +161,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -171,6 +180,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -189,6 +199,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -209,6 +220,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -227,6 +239,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -245,6 +258,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -263,6 +277,7 @@ ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll @@ -127,6 +127,7 @@ ; RV32-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -143,6 +144,7 @@ ; RV64-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,6 +166,7 @@ ; RV32-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -180,6 +183,7 @@ ; RV64-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -317,6 +321,7 @@ ; RV32-NEXT: flh fa5, %lo(.LCPI14_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -333,6 +338,7 @@ ; RV64-NEXT: flh fa5, %lo(.LCPI14_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -354,6 +360,7 @@ ; RV32-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -370,6 +377,7 @@ ; RV64-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -543,6 +551,7 @@ ; RV32-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -561,6 +570,7 @@ ; RV64-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -584,6 +594,7 @@ ; RV32-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -602,6 +613,7 @@ ; RV64-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -777,6 +789,7 @@ ; RV32-NEXT: flh fa5, %lo(.LCPI30_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -795,6 +808,7 @@ ; RV64-NEXT: flh fa5, %lo(.LCPI30_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -818,6 +832,7 @@ ; RV32-NEXT: flh fa5, %lo(.LCPI31_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t @@ -836,6 +851,7 @@ ; RV64-NEXT: flh fa5, %lo(.LCPI31_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -30,8 +30,10 @@ define @vp_nearbyint_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,8 +76,10 @@ define @vp_nearbyint_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -118,8 +122,10 @@ define @vp_nearbyint_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -164,8 +170,10 @@ define @vp_nearbyint_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -210,8 +218,10 @@ define @vp_nearbyint_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -256,8 +266,10 @@ define @vp_nearbyint_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -300,6 +312,8 @@ define @vp_nearbyint_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define @vp_nearbyint_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ define @vp_nearbyint_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -436,6 +454,8 @@ define @vp_nearbyint_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -482,6 +502,8 @@ define @vp_nearbyint_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 @@ -526,8 +548,10 @@ define @vp_nearbyint_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -572,8 +596,10 @@ define @vp_nearbyint_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -618,8 +644,10 @@ define @vp_nearbyint_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -664,8 +692,10 @@ define @vp_nearbyint_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -710,8 +740,10 @@ define @vp_nearbyint_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -783,6 +815,8 @@ define @vp_nearbyint_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -792,6 +826,7 @@ ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -805,8 +840,9 @@ ; CHECK-NEXT: .LBB33_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vmflt.vf v1, v24, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -28,8 +28,10 @@ define @vp_rint_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -68,8 +70,10 @@ define @vp_rint_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -108,8 +112,10 @@ define @vp_rint_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -150,8 +156,10 @@ define @vp_rint_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -192,8 +200,10 @@ define @vp_rint_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -234,8 +244,10 @@ define @vp_rint_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -274,6 +286,8 @@ define @vp_rint_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -314,6 +328,8 @@ define @vp_rint_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -356,6 +372,8 @@ define @vp_rint_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -398,6 +416,8 @@ define @vp_rint_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -440,6 +460,8 @@ define @vp_rint_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 @@ -480,8 +502,10 @@ define @vp_rint_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -522,8 +546,10 @@ define @vp_rint_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -564,8 +590,10 @@ define @vp_rint_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -606,8 +634,10 @@ define @vp_rint_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -648,8 +678,10 @@ define @vp_rint_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -728,6 +760,14 @@ define @vp_rint_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -737,10 +777,14 @@ ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: @@ -748,11 +792,16 @@ ; CHECK-NEXT: .LBB33_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -30,8 +30,10 @@ define @vp_round_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,8 +76,10 @@ define @vp_round_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -118,8 +122,10 @@ define @vp_round_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -164,8 +170,10 @@ define @vp_round_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -210,8 +218,10 @@ define @vp_round_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -256,8 +266,10 @@ define @vp_round_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -300,6 +312,8 @@ define @vp_round_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define @vp_round_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ define @vp_round_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -436,6 +454,8 @@ define @vp_round_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -482,6 +502,8 @@ define @vp_round_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 @@ -526,8 +548,10 @@ define @vp_round_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -572,8 +596,10 @@ define @vp_round_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -618,8 +644,10 @@ define @vp_round_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -664,8 +692,10 @@ define @vp_round_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -710,8 +740,10 @@ define @vp_round_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -799,6 +831,16 @@ define @vp_round_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -807,20 +849,34 @@ ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v8, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -828,6 +884,10 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -30,8 +30,10 @@ define @vp_roundeven_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,8 +76,10 @@ define @vp_roundeven_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -118,8 +122,10 @@ define @vp_roundeven_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -164,8 +170,10 @@ define @vp_roundeven_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -210,8 +218,10 @@ define @vp_roundeven_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -256,8 +266,10 @@ define @vp_roundeven_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -300,6 +312,8 @@ define @vp_roundeven_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define @vp_roundeven_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ define @vp_roundeven_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -436,6 +454,8 @@ define @vp_roundeven_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -482,6 +502,8 @@ define @vp_roundeven_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 @@ -526,8 +548,10 @@ define @vp_roundeven_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -572,8 +596,10 @@ define @vp_roundeven_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -618,8 +644,10 @@ define @vp_roundeven_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -664,8 +692,10 @@ define @vp_roundeven_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -710,8 +740,10 @@ define @vp_roundeven_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -799,6 +831,16 @@ define @vp_roundeven_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -807,20 +849,34 @@ ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v8, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -828,6 +884,10 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -30,8 +30,10 @@ define @vp_roundtozero_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,8 +76,10 @@ define @vp_roundtozero_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -118,8 +122,10 @@ define @vp_roundtozero_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -164,8 +170,10 @@ define @vp_roundtozero_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI7_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -210,8 +218,10 @@ define @vp_roundtozero_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI9_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -256,8 +266,10 @@ define @vp_roundtozero_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI11_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -300,6 +312,8 @@ define @vp_roundtozero_nxv1f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -344,6 +358,8 @@ define @vp_roundtozero_nxv2f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 @@ -390,6 +406,8 @@ define @vp_roundtozero_nxv4f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 @@ -436,6 +454,8 @@ define @vp_roundtozero_nxv8f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 @@ -482,6 +502,8 @@ define @vp_roundtozero_nxv16f32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 @@ -526,8 +548,10 @@ define @vp_roundtozero_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -572,8 +596,10 @@ define @vp_roundtozero_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI25_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -618,8 +644,10 @@ define @vp_roundtozero_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -664,8 +692,10 @@ define @vp_roundtozero_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI29_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -710,8 +740,10 @@ define @vp_roundtozero_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI31_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1) +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -799,6 +831,16 @@ define @vp_roundtozero_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -807,20 +849,34 @@ ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: vfabs.v v8, v16 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v8, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfabs.v v24, v8 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -828,6 +884,10 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer