diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -521,6 +521,8 @@ setOperationAction( {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Custom); + setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, + Custom); // Expand all extending loads to types larger than this, and truncating // stores from types larger than this. @@ -575,6 +577,8 @@ setOperationAction( {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Custom); + setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, + Custom); setOperationAction( {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); @@ -788,6 +792,8 @@ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Custom); + setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, + Custom); // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { @@ -1863,31 +1869,93 @@ // nan case with a compare and a select. SDValue Src = Op.getOperand(0); - EVT DstVT = Op.getValueType(); + MVT DstVT = Op.getSimpleValueType(); EVT SatVT = cast(Op.getOperand(1))->getVT(); bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; - unsigned Opc; - if (SatVT == DstVT) - Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; - else if (DstVT == MVT::i64 && SatVT == MVT::i32) - Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; - else + + if (!DstVT.isVector()) { + unsigned Opc; + if (SatVT == DstVT) + Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; + else if (DstVT == MVT::i64 && SatVT == MVT::i32) + Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; + else + return SDValue(); + // FIXME: Support other SatVTs by clamping before or after the conversion. + + SDLoc DL(Op); + SDValue FpToInt = DAG.getNode( + Opc, DL, DstVT, Src, + DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT())); + + if (Opc == RISCVISD::FCVT_WU_RV64) + FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); + + SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); + return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, + ISD::CondCode::SETUO); + } + + // Vectors. + + MVT DstEltVT = DstVT.getVectorElementType(); + MVT SrcVT = Src.getSimpleValueType(); + MVT SrcEltVT = SrcVT.getVectorElementType(); + unsigned SrcEltSize = SrcEltVT.getSizeInBits(); + unsigned DstEltSize = DstEltVT.getSizeInBits(); + + // Only handle saturating to the destination type. + if (SatVT != DstEltVT) return SDValue(); - // FIXME: Support other SatVTs by clamping before or after the conversion. + + // FIXME: Don't support narrowing by more than 1 steps for now. + if (SrcEltSize > (2 * DstEltSize)) + return SDValue(); + + MVT DstContainerVT = DstVT; + MVT SrcContainerVT = SrcVT; + if (DstVT.isFixedLengthVector()) { + DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget); + SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); + assert(DstContainerVT.getVectorElementCount() == + SrcContainerVT.getVectorElementCount() && + "Expected same element count"); + Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); + } SDLoc DL(Op); - SDValue FpToInt = DAG.getNode( - Opc, DL, DstVT, Src, - DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT())); - // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero - // extend. - if (Opc == RISCVISD::FCVT_WU_RV64) - FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); + SDValue Mask, VL; + std::tie(Mask, VL) = + getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget); - SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); - return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); + SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), + {Src, Src, DAG.getCondCode(ISD::SETNE), + DAG.getUNDEF(Mask.getValueType()), Mask, VL}); + + // Need to widen by more than 1 step, promote the FP type, then do a widening + // convert. + if (DstEltSize > (2 * SrcEltSize)) { + assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!"); + MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32); + Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL); + } + + unsigned RVVOpc = + IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; + SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL); + + SDValue SplatZero = DAG.getNode( + RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT), + DAG.getConstant(0, DL, Subtarget.getXLenVT())); + Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero, + Res, VL); + + if (DstVT.isFixedLengthVector()) + Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget); + + return Res; } static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -7,24 +7,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: feq.s a0, ft0, ft0 -; CHECK-NEXT: beqz a0, .LBB0_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fcvt.w.s a0, ft0, rtz -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: feq.s a2, ft0, ft0 -; CHECK-NEXT: beqz a2, .LBB0_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fcvt.w.s a2, ft0, rtz -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vmv.v.x v8, a2 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x float>, <2 x float>* %x @@ -35,59 +20,15 @@ declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>) define void @fp2ui_v2f32_v2i32(<2 x float>* %x, <2 x i32>* %y) { -; RV32-LABEL: fp2ui_v2f32_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: feq.s a0, ft0, ft0 -; RV32-NEXT: beqz a0, .LBB1_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz -; RV32-NEXT: .LBB1_2: -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: beqz a2, .LBB1_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fcvt.wu.s a2, ft0, rtz -; RV32-NEXT: .LBB1_4: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: ret -; -; RV64-LABEL: fp2ui_v2f32_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB1_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.wu.s a0, ft0, rtz -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: .LBB1_2: -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: beqz a2, .LBB1_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.wu.s a2, ft0, rtz -; RV64-NEXT: slli a2, a2, 32 -; RV64-NEXT: srli a2, a2, 32 -; RV64-NEXT: .LBB1_4: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: ret +; CHECK-LABEL: fp2ui_v2f32_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <2 x float>, <2 x float>* %x %d = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %a) store <2 x i32> %d, <2 x i32>* %y @@ -97,215 +38,15 @@ define void @fp2si_v8f32_v8i32(<8 x float>* %x, <8 x i32>* %y) { ; -; RV32-LABEL: fp2si_v8f32_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: .cfi_def_cfa_offset 64 -; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 64 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: feq.s a0, ft0, ft0 -; RV32-NEXT: beqz a0, .LBB2_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB2_2: -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: vslidedown.vi v10, v8, 7 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a0, ft0, ft0 -; RV32-NEXT: beqz a0, .LBB2_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB2_4: -; RV32-NEXT: vslidedown.vi v10, v8, 6 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: bnez a2, .LBB2_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB2_7 -; RV32-NEXT: .LBB2_6: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB2_7: -; RV32-NEXT: vslidedown.vi v10, v8, 5 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: bnez a2, .LBB2_9 -; RV32-NEXT: # %bb.8: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB2_10 -; RV32-NEXT: .LBB2_9: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB2_10: -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: bnez a2, .LBB2_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB2_13 -; RV32-NEXT: .LBB2_12: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB2_13: -; RV32-NEXT: vslidedown.vi v10, v8, 3 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: bnez a2, .LBB2_15 -; RV32-NEXT: # %bb.14: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB2_16 -; RV32-NEXT: .LBB2_15: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB2_16: -; RV32-NEXT: vslidedown.vi v10, v8, 2 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: bnez a2, .LBB2_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB2_19 -; RV32-NEXT: .LBB2_18: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB2_19: -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: bnez a2, .LBB2_21 -; RV32-NEXT: # %bb.20: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB2_22 -; RV32-NEXT: .LBB2_21: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB2_22: -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: addi sp, s0, -64 -; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 -; RV32-NEXT: ret -; -; RV64-LABEL: fp2si_v8f32_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 64 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB2_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB2_2: -; RV64-NEXT: sw a0, 0(sp) -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV64-NEXT: vslidedown.vi v10, v8, 7 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB2_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB2_4: -; RV64-NEXT: vslidedown.vi v10, v8, 6 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: bnez a2, .LBB2_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB2_7 -; RV64-NEXT: .LBB2_6: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB2_7: -; RV64-NEXT: vslidedown.vi v10, v8, 5 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: bnez a2, .LBB2_9 -; RV64-NEXT: # %bb.8: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB2_10 -; RV64-NEXT: .LBB2_9: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB2_10: -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: bnez a2, .LBB2_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB2_13 -; RV64-NEXT: .LBB2_12: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB2_13: -; RV64-NEXT: vslidedown.vi v10, v8, 3 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 16(sp) -; RV64-NEXT: bnez a2, .LBB2_15 -; RV64-NEXT: # %bb.14: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB2_16 -; RV64-NEXT: .LBB2_15: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB2_16: -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: bnez a2, .LBB2_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB2_19 -; RV64-NEXT: .LBB2_18: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB2_19: -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 8(sp) -; RV64-NEXT: bnez a2, .LBB2_21 -; RV64-NEXT: # %bb.20: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB2_22 -; RV64-NEXT: .LBB2_21: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB2_22: -; RV64-NEXT: sw a0, 4(sp) -; RV64-NEXT: mv a0, sp -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: addi sp, s0, -64 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: ret +; CHECK-LABEL: fp2si_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, <8 x float>* %x %d = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %a) store <8 x i32> %d, <8 x i32>* %y @@ -315,215 +56,15 @@ define void @fp2ui_v8f32_v8i32(<8 x float>* %x, <8 x i32>* %y) { ; -; RV32-LABEL: fp2ui_v8f32_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: .cfi_def_cfa_offset 64 -; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 64 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: feq.s a0, ft0, ft0 -; RV32-NEXT: beqz a0, .LBB3_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB3_2: -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: vslidedown.vi v10, v8, 7 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a0, ft0, ft0 -; RV32-NEXT: beqz a0, .LBB3_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB3_4: -; RV32-NEXT: vslidedown.vi v10, v8, 6 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: bnez a2, .LBB3_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB3_7 -; RV32-NEXT: .LBB3_6: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB3_7: -; RV32-NEXT: vslidedown.vi v10, v8, 5 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: bnez a2, .LBB3_9 -; RV32-NEXT: # %bb.8: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB3_10 -; RV32-NEXT: .LBB3_9: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB3_10: -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: bnez a2, .LBB3_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB3_13 -; RV32-NEXT: .LBB3_12: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB3_13: -; RV32-NEXT: vslidedown.vi v10, v8, 3 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: bnez a2, .LBB3_15 -; RV32-NEXT: # %bb.14: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB3_16 -; RV32-NEXT: .LBB3_15: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB3_16: -; RV32-NEXT: vslidedown.vi v10, v8, 2 -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: bnez a2, .LBB3_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB3_19 -; RV32-NEXT: .LBB3_18: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB3_19: -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: feq.s a2, ft0, ft0 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: bnez a2, .LBB3_21 -; RV32-NEXT: # %bb.20: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: j .LBB3_22 -; RV32-NEXT: .LBB3_21: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz -; RV32-NEXT: .LBB3_22: -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: addi sp, s0, -64 -; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 -; RV32-NEXT: ret -; -; RV64-LABEL: fp2ui_v8f32_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 64 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB3_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB3_2: -; RV64-NEXT: sw a0, 0(sp) -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV64-NEXT: vslidedown.vi v10, v8, 7 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB3_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB3_4: -; RV64-NEXT: vslidedown.vi v10, v8, 6 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: bnez a2, .LBB3_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB3_7 -; RV64-NEXT: .LBB3_6: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB3_7: -; RV64-NEXT: vslidedown.vi v10, v8, 5 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: bnez a2, .LBB3_9 -; RV64-NEXT: # %bb.8: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB3_10 -; RV64-NEXT: .LBB3_9: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB3_10: -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: bnez a2, .LBB3_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB3_13 -; RV64-NEXT: .LBB3_12: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB3_13: -; RV64-NEXT: vslidedown.vi v10, v8, 3 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 16(sp) -; RV64-NEXT: bnez a2, .LBB3_15 -; RV64-NEXT: # %bb.14: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB3_16 -; RV64-NEXT: .LBB3_15: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB3_16: -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: bnez a2, .LBB3_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB3_19 -; RV64-NEXT: .LBB3_18: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB3_19: -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sw a0, 8(sp) -; RV64-NEXT: bnez a2, .LBB3_21 -; RV64-NEXT: # %bb.20: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB3_22 -; RV64-NEXT: .LBB3_21: -; RV64-NEXT: fcvt.w.s a0, ft0, rtz -; RV64-NEXT: .LBB3_22: -; RV64-NEXT: sw a0, 4(sp) -; RV64-NEXT: mv a0, sp -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: addi sp, s0, -64 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: ret +; CHECK-LABEL: fp2ui_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, <8 x float>* %x %d = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %a) store <8 x i32> %d, <8 x i32>* %y @@ -532,168 +73,16 @@ declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float>) define void @fp2si_v2f32_v2i64(<2 x float>* %x, <2 x i64>* %y) { -; RV32-LABEL: fp2si_v2f32_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 64(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs1, 56(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs2, 48(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset fs0, -32 -; RV32-NEXT: .cfi_offset fs1, -40 -; RV32-NEXT: .cfi_offset fs2, -48 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a0, %hi(.LCPI4_0) -; RV32-NEXT: flw fs1, %lo(.LCPI4_0)(a0) -; RV32-NEXT: mv s0, a1 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s2, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: bnez s2, .LBB4_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB4_2: -; RV32-NEXT: lui a2, %hi(.LCPI4_1) -; RV32-NEXT: flw fs2, %lo(.LCPI4_1)(a2) -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: addi s1, a3, -1 -; RV32-NEXT: mv a3, s1 -; RV32-NEXT: beqz a2, .LBB4_18 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB4_19 -; RV32-NEXT: .LBB4_4: -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: bnez s2, .LBB4_6 -; RV32-NEXT: .LBB4_5: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: .LBB4_6: -; RV32-NEXT: li s2, -1 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB4_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: .LBB4_8: -; RV32-NEXT: bnez a1, .LBB4_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB4_10: -; RV32-NEXT: sw a3, 16(sp) -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s3, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: beqz s3, .LBB4_20 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: beqz a2, .LBB4_21 -; RV32-NEXT: .LBB4_12: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB4_22 -; RV32-NEXT: .LBB4_13: -; RV32-NEXT: sw s1, 28(sp) -; RV32-NEXT: beqz s3, .LBB4_23 -; RV32-NEXT: .LBB4_14: -; RV32-NEXT: beqz a2, .LBB4_24 -; RV32-NEXT: .LBB4_15: -; RV32-NEXT: bnez a1, .LBB4_17 -; RV32-NEXT: .LBB4_16: -; RV32-NEXT: li s2, 0 -; RV32-NEXT: .LBB4_17: -; RV32-NEXT: sw s2, 24(sp) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vse64.v v8, (s0) -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 64(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs1, 56(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs2, 48(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; RV32-NEXT: .LBB4_18: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB4_4 -; RV32-NEXT: .LBB4_19: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: beqz s2, .LBB4_5 -; RV32-NEXT: j .LBB4_6 -; RV32-NEXT: .LBB4_20: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: bnez a2, .LBB4_12 -; RV32-NEXT: .LBB4_21: -; RV32-NEXT: mv s1, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB4_13 -; RV32-NEXT: .LBB4_22: -; RV32-NEXT: li s1, 0 -; RV32-NEXT: sw s1, 28(sp) -; RV32-NEXT: bnez s3, .LBB4_14 -; RV32-NEXT: .LBB4_23: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: bnez a2, .LBB4_15 -; RV32-NEXT: .LBB4_24: -; RV32-NEXT: mv s2, a0 -; RV32-NEXT: beqz a1, .LBB4_16 -; RV32-NEXT: j .LBB4_17 -; -; RV64-LABEL: fp2si_v2f32_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB4_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB4_2: -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: beqz a2, .LBB4_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.l.s a2, ft0, rtz -; RV64-NEXT: .LBB4_4: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: ret +; CHECK-LABEL: fp2si_v2f32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret %a = load <2 x float>, <2 x float>* %x %d = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %a) store <2 x i64> %d, <2 x i64>* %y @@ -702,140 +91,16 @@ declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float>) define void @fp2ui_v2f32_v2i64(<2 x float>* %x, <2 x i64>* %y) { -; RV32-LABEL: fp2ui_v2f32_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 72(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs1, 64(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs2, 56(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset fs0, -24 -; RV32-NEXT: .cfi_offset fs1, -32 -; RV32-NEXT: .cfi_offset fs2, -40 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: mv s0, a1 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fmv.w.x fs1, zero -; RV32-NEXT: fle.s s2, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s2, .LBB5_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB5_2: -; RV32-NEXT: lui a2, %hi(.LCPI5_0) -; RV32-NEXT: flw fs2, %lo(.LCPI5_0)(a2) -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li s1, -1 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB5_13 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: beqz s2, .LBB5_14 -; RV32-NEXT: .LBB5_4: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB5_6 -; RV32-NEXT: .LBB5_5: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB5_6: -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s2, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s2, .LBB5_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB5_8: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB5_15 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: sw a3, 28(sp) -; RV32-NEXT: beqz s2, .LBB5_16 -; RV32-NEXT: .LBB5_10: -; RV32-NEXT: bnez a2, .LBB5_12 -; RV32-NEXT: .LBB5_11: -; RV32-NEXT: mv s1, a0 -; RV32-NEXT: .LBB5_12: -; RV32-NEXT: sw s1, 24(sp) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vse64.v v8, (s0) -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 72(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs1, 64(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs2, 56(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; RV32-NEXT: .LBB5_13: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: bnez s2, .LBB5_4 -; RV32-NEXT: .LBB5_14: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB5_5 -; RV32-NEXT: j .LBB5_6 -; RV32-NEXT: .LBB5_15: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 28(sp) -; RV32-NEXT: bnez s2, .LBB5_10 -; RV32-NEXT: .LBB5_16: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: beqz a2, .LBB5_11 -; RV32-NEXT: j .LBB5_12 -; -; RV64-LABEL: fp2ui_v2f32_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB5_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB5_2: -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: beqz a2, .LBB5_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.lu.s a2, ft0, rtz -; RV64-NEXT: .LBB5_4: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: ret +; CHECK-LABEL: fp2ui_v2f32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret %a = load <2 x float>, <2 x float>* %x %d = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %a) store <2 x i64> %d, <2 x i64>* %y @@ -845,528 +110,16 @@ define void @fp2si_v8f32_v8i64(<8 x float>* %x, <8 x i64>* %y) { ; -; RV32-LABEL: fp2si_v8f32_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -192 -; RV32-NEXT: .cfi_def_cfa_offset 192 -; RV32-NEXT: sw ra, 188(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 184(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 180(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 176(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 172(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 168(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 160(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs1, 152(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs2, 144(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s2, -12 -; RV32-NEXT: .cfi_offset s3, -16 -; RV32-NEXT: .cfi_offset s4, -20 -; RV32-NEXT: .cfi_offset s5, -24 -; RV32-NEXT: .cfi_offset fs0, -32 -; RV32-NEXT: .cfi_offset fs1, -40 -; RV32-NEXT: .cfi_offset fs2, -48 -; RV32-NEXT: addi s0, sp, 192 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-NEXT: flw fs1, %lo(.LCPI6_0)(a0) -; RV32-NEXT: mv s2, a1 -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: bnez s4, .LBB6_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB6_2: -; RV32-NEXT: lui a2, %hi(.LCPI6_1) -; RV32-NEXT: flw fs2, %lo(.LCPI6_1)(a2) -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: addi s3, a3, -1 -; RV32-NEXT: mv a3, s3 -; RV32-NEXT: beqz a2, .LBB6_66 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB6_67 -; RV32-NEXT: .LBB6_4: -; RV32-NEXT: sw a3, 68(sp) -; RV32-NEXT: bnez s4, .LBB6_6 -; RV32-NEXT: .LBB6_5: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: .LBB6_6: -; RV32-NEXT: li s4, -1 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB6_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: .LBB6_8: -; RV32-NEXT: bnez a1, .LBB6_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB6_10: -; RV32-NEXT: sw a3, 64(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 7 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s5, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: bnez s5, .LBB6_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB6_12: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: mv a3, s3 -; RV32-NEXT: beqz a2, .LBB6_68 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB6_69 -; RV32-NEXT: .LBB6_14: -; RV32-NEXT: sw a3, 124(sp) -; RV32-NEXT: beqz s5, .LBB6_70 -; RV32-NEXT: .LBB6_15: -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB6_71 -; RV32-NEXT: .LBB6_16: -; RV32-NEXT: bnez a1, .LBB6_18 -; RV32-NEXT: .LBB6_17: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB6_18: -; RV32-NEXT: sw a3, 120(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 6 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s5, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: bnez s5, .LBB6_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB6_20: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: mv a3, s3 -; RV32-NEXT: beqz a2, .LBB6_72 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB6_73 -; RV32-NEXT: .LBB6_22: -; RV32-NEXT: sw a3, 116(sp) -; RV32-NEXT: beqz s5, .LBB6_74 -; RV32-NEXT: .LBB6_23: -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB6_75 -; RV32-NEXT: .LBB6_24: -; RV32-NEXT: bnez a1, .LBB6_26 -; RV32-NEXT: .LBB6_25: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB6_26: -; RV32-NEXT: sw a3, 112(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 5 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s5, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: bnez s5, .LBB6_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB6_28: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: mv a3, s3 -; RV32-NEXT: beqz a2, .LBB6_76 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB6_77 -; RV32-NEXT: .LBB6_30: -; RV32-NEXT: sw a3, 108(sp) -; RV32-NEXT: beqz s5, .LBB6_78 -; RV32-NEXT: .LBB6_31: -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB6_79 -; RV32-NEXT: .LBB6_32: -; RV32-NEXT: bnez a1, .LBB6_34 -; RV32-NEXT: .LBB6_33: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB6_34: -; RV32-NEXT: sw a3, 104(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 4 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s5, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: bnez s5, .LBB6_36 -; RV32-NEXT: # %bb.35: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB6_36: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: mv a3, s3 -; RV32-NEXT: beqz a2, .LBB6_80 -; RV32-NEXT: # %bb.37: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB6_81 -; RV32-NEXT: .LBB6_38: -; RV32-NEXT: sw a3, 100(sp) -; RV32-NEXT: beqz s5, .LBB6_82 -; RV32-NEXT: .LBB6_39: -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB6_83 -; RV32-NEXT: .LBB6_40: -; RV32-NEXT: bnez a1, .LBB6_42 -; RV32-NEXT: .LBB6_41: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB6_42: -; RV32-NEXT: sw a3, 96(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s5, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: bnez s5, .LBB6_44 -; RV32-NEXT: # %bb.43: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB6_44: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: mv a3, s3 -; RV32-NEXT: beqz a2, .LBB6_84 -; RV32-NEXT: # %bb.45: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB6_85 -; RV32-NEXT: .LBB6_46: -; RV32-NEXT: sw a3, 92(sp) -; RV32-NEXT: beqz s5, .LBB6_86 -; RV32-NEXT: .LBB6_47: -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB6_87 -; RV32-NEXT: .LBB6_48: -; RV32-NEXT: bnez a1, .LBB6_50 -; RV32-NEXT: .LBB6_49: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB6_50: -; RV32-NEXT: sw a3, 88(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s5, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: bnez s5, .LBB6_52 -; RV32-NEXT: # %bb.51: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB6_52: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: mv a3, s3 -; RV32-NEXT: beqz a2, .LBB6_88 -; RV32-NEXT: # %bb.53: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB6_89 -; RV32-NEXT: .LBB6_54: -; RV32-NEXT: sw a3, 84(sp) -; RV32-NEXT: beqz s5, .LBB6_90 -; RV32-NEXT: .LBB6_55: -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB6_91 -; RV32-NEXT: .LBB6_56: -; RV32-NEXT: bnez a1, .LBB6_58 -; RV32-NEXT: .LBB6_57: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB6_58: -; RV32-NEXT: sw a3, 80(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s5, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: beqz s5, .LBB6_92 -; RV32-NEXT: # %bb.59: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: beqz a2, .LBB6_93 -; RV32-NEXT: .LBB6_60: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB6_94 -; RV32-NEXT: .LBB6_61: -; RV32-NEXT: sw s3, 76(sp) -; RV32-NEXT: beqz s5, .LBB6_95 -; RV32-NEXT: .LBB6_62: -; RV32-NEXT: beqz a2, .LBB6_96 -; RV32-NEXT: .LBB6_63: -; RV32-NEXT: bnez a1, .LBB6_65 -; RV32-NEXT: .LBB6_64: -; RV32-NEXT: li s4, 0 -; RV32-NEXT: .LBB6_65: -; RV32-NEXT: sw s4, 72(sp) -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vse64.v v8, (s2) -; RV32-NEXT: addi sp, s0, -192 -; RV32-NEXT: lw ra, 188(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 184(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 180(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 176(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 172(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 168(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 160(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs1, 152(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs2, 144(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 192 -; RV32-NEXT: ret -; RV32-NEXT: .LBB6_66: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB6_4 -; RV32-NEXT: .LBB6_67: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 68(sp) -; RV32-NEXT: beqz s4, .LBB6_5 -; RV32-NEXT: j .LBB6_6 -; RV32-NEXT: .LBB6_68: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB6_14 -; RV32-NEXT: .LBB6_69: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 124(sp) -; RV32-NEXT: bnez s5, .LBB6_15 -; RV32-NEXT: .LBB6_70: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB6_16 -; RV32-NEXT: .LBB6_71: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: beqz a1, .LBB6_17 -; RV32-NEXT: j .LBB6_18 -; RV32-NEXT: .LBB6_72: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB6_22 -; RV32-NEXT: .LBB6_73: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 116(sp) -; RV32-NEXT: bnez s5, .LBB6_23 -; RV32-NEXT: .LBB6_74: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB6_24 -; RV32-NEXT: .LBB6_75: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: beqz a1, .LBB6_25 -; RV32-NEXT: j .LBB6_26 -; RV32-NEXT: .LBB6_76: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB6_30 -; RV32-NEXT: .LBB6_77: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 108(sp) -; RV32-NEXT: bnez s5, .LBB6_31 -; RV32-NEXT: .LBB6_78: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB6_32 -; RV32-NEXT: .LBB6_79: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: beqz a1, .LBB6_33 -; RV32-NEXT: j .LBB6_34 -; RV32-NEXT: .LBB6_80: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB6_38 -; RV32-NEXT: .LBB6_81: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 100(sp) -; RV32-NEXT: bnez s5, .LBB6_39 -; RV32-NEXT: .LBB6_82: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB6_40 -; RV32-NEXT: .LBB6_83: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: beqz a1, .LBB6_41 -; RV32-NEXT: j .LBB6_42 -; RV32-NEXT: .LBB6_84: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB6_46 -; RV32-NEXT: .LBB6_85: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 92(sp) -; RV32-NEXT: bnez s5, .LBB6_47 -; RV32-NEXT: .LBB6_86: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB6_48 -; RV32-NEXT: .LBB6_87: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: beqz a1, .LBB6_49 -; RV32-NEXT: j .LBB6_50 -; RV32-NEXT: .LBB6_88: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB6_54 -; RV32-NEXT: .LBB6_89: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 84(sp) -; RV32-NEXT: bnez s5, .LBB6_55 -; RV32-NEXT: .LBB6_90: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB6_56 -; RV32-NEXT: .LBB6_91: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: beqz a1, .LBB6_57 -; RV32-NEXT: j .LBB6_58 -; RV32-NEXT: .LBB6_92: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: bnez a2, .LBB6_60 -; RV32-NEXT: .LBB6_93: -; RV32-NEXT: mv s3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB6_61 -; RV32-NEXT: .LBB6_94: -; RV32-NEXT: li s3, 0 -; RV32-NEXT: sw s3, 76(sp) -; RV32-NEXT: bnez s5, .LBB6_62 -; RV32-NEXT: .LBB6_95: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: bnez a2, .LBB6_63 -; RV32-NEXT: .LBB6_96: -; RV32-NEXT: mv s4, a0 -; RV32-NEXT: beqz a1, .LBB6_64 -; RV32-NEXT: j .LBB6_65 -; -; RV64-LABEL: fp2si_v8f32_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -128 -; RV64-NEXT: .cfi_def_cfa_offset 128 -; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 128 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB6_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB6_2: -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV64-NEXT: vslidedown.vi v10, v8, 7 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB6_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB6_4: -; RV64-NEXT: vslidedown.vi v10, v8, 6 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 56(sp) -; RV64-NEXT: bnez a2, .LBB6_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB6_7 -; RV64-NEXT: .LBB6_6: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB6_7: -; RV64-NEXT: vslidedown.vi v10, v8, 5 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 48(sp) -; RV64-NEXT: bnez a2, .LBB6_9 -; RV64-NEXT: # %bb.8: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB6_10 -; RV64-NEXT: .LBB6_9: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB6_10: -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 40(sp) -; RV64-NEXT: bnez a2, .LBB6_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB6_13 -; RV64-NEXT: .LBB6_12: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB6_13: -; RV64-NEXT: vslidedown.vi v10, v8, 3 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 32(sp) -; RV64-NEXT: bnez a2, .LBB6_15 -; RV64-NEXT: # %bb.14: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB6_16 -; RV64-NEXT: .LBB6_15: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB6_16: -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 24(sp) -; RV64-NEXT: bnez a2, .LBB6_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB6_19 -; RV64-NEXT: .LBB6_18: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB6_19: -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: bnez a2, .LBB6_21 -; RV64-NEXT: # %bb.20: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB6_22 -; RV64-NEXT: .LBB6_21: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz -; RV64-NEXT: .LBB6_22: -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: mv a0, sp -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: addi sp, s0, -128 -; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 128 -; RV64-NEXT: ret +; CHECK-LABEL: fp2si_v8f32_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, <8 x float>* %x %d = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %a) store <8 x i64> %d, <8 x i64>* %y @@ -1376,428 +129,16 @@ define void @fp2ui_v8f32_v8i64(<8 x float>* %x, <8 x i64>* %y) { ; -; RV32-LABEL: fp2ui_v8f32_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -208 -; RV32-NEXT: .cfi_def_cfa_offset 208 -; RV32-NEXT: sw ra, 204(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 200(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 196(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 192(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 188(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 176(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs1, 168(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs2, 160(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s2, -12 -; RV32-NEXT: .cfi_offset s3, -16 -; RV32-NEXT: .cfi_offset s4, -20 -; RV32-NEXT: .cfi_offset fs0, -32 -; RV32-NEXT: .cfi_offset fs1, -40 -; RV32-NEXT: .cfi_offset fs2, -48 -; RV32-NEXT: addi s0, sp, 208 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: mv s2, a1 -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fmv.w.x fs1, zero -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s4, .LBB7_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB7_2: -; RV32-NEXT: lui a2, %hi(.LCPI7_0) -; RV32-NEXT: flw fs2, %lo(.LCPI7_0)(a2) -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li s3, -1 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB7_49 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: sw a3, 68(sp) -; RV32-NEXT: beqz s4, .LBB7_50 -; RV32-NEXT: .LBB7_4: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB7_6 -; RV32-NEXT: .LBB7_5: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB7_6: -; RV32-NEXT: sw a1, 64(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 7 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s4, .LBB7_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB7_8: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB7_51 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: sw a3, 124(sp) -; RV32-NEXT: beqz s4, .LBB7_52 -; RV32-NEXT: .LBB7_10: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB7_12 -; RV32-NEXT: .LBB7_11: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB7_12: -; RV32-NEXT: sw a1, 120(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 6 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s4, .LBB7_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB7_14: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB7_53 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: sw a3, 116(sp) -; RV32-NEXT: beqz s4, .LBB7_54 -; RV32-NEXT: .LBB7_16: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB7_18 -; RV32-NEXT: .LBB7_17: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB7_18: -; RV32-NEXT: sw a1, 112(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 5 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s4, .LBB7_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB7_20: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB7_55 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: sw a3, 108(sp) -; RV32-NEXT: beqz s4, .LBB7_56 -; RV32-NEXT: .LBB7_22: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB7_24 -; RV32-NEXT: .LBB7_23: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB7_24: -; RV32-NEXT: sw a1, 104(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 4 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s4, .LBB7_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB7_26: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB7_57 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: sw a3, 100(sp) -; RV32-NEXT: beqz s4, .LBB7_58 -; RV32-NEXT: .LBB7_28: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB7_30 -; RV32-NEXT: .LBB7_29: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB7_30: -; RV32-NEXT: sw a1, 96(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s4, .LBB7_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB7_32: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB7_59 -; RV32-NEXT: # %bb.33: -; RV32-NEXT: sw a3, 92(sp) -; RV32-NEXT: beqz s4, .LBB7_60 -; RV32-NEXT: .LBB7_34: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB7_36 -; RV32-NEXT: .LBB7_35: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB7_36: -; RV32-NEXT: sw a1, 88(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s4, .LBB7_38 -; RV32-NEXT: # %bb.37: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB7_38: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB7_61 -; RV32-NEXT: # %bb.39: -; RV32-NEXT: sw a3, 84(sp) -; RV32-NEXT: beqz s4, .LBB7_62 -; RV32-NEXT: .LBB7_40: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB7_42 -; RV32-NEXT: .LBB7_41: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB7_42: -; RV32-NEXT: sw a1, 80(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s fs0, v8 -; RV32-NEXT: fle.s s4, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s4, .LBB7_44 -; RV32-NEXT: # %bb.43: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB7_44: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB7_63 -; RV32-NEXT: # %bb.45: -; RV32-NEXT: sw a3, 76(sp) -; RV32-NEXT: beqz s4, .LBB7_64 -; RV32-NEXT: .LBB7_46: -; RV32-NEXT: bnez a2, .LBB7_48 -; RV32-NEXT: .LBB7_47: -; RV32-NEXT: mv s3, a0 -; RV32-NEXT: .LBB7_48: -; RV32-NEXT: sw s3, 72(sp) -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vse64.v v8, (s2) -; RV32-NEXT: addi sp, s0, -208 -; RV32-NEXT: lw ra, 204(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 200(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 196(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 192(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 188(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 176(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs1, 168(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs2, 160(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 208 -; RV32-NEXT: ret -; RV32-NEXT: .LBB7_49: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 68(sp) -; RV32-NEXT: bnez s4, .LBB7_4 -; RV32-NEXT: .LBB7_50: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB7_5 -; RV32-NEXT: j .LBB7_6 -; RV32-NEXT: .LBB7_51: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 124(sp) -; RV32-NEXT: bnez s4, .LBB7_10 -; RV32-NEXT: .LBB7_52: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB7_11 -; RV32-NEXT: j .LBB7_12 -; RV32-NEXT: .LBB7_53: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 116(sp) -; RV32-NEXT: bnez s4, .LBB7_16 -; RV32-NEXT: .LBB7_54: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB7_17 -; RV32-NEXT: j .LBB7_18 -; RV32-NEXT: .LBB7_55: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 108(sp) -; RV32-NEXT: bnez s4, .LBB7_22 -; RV32-NEXT: .LBB7_56: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB7_23 -; RV32-NEXT: j .LBB7_24 -; RV32-NEXT: .LBB7_57: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 100(sp) -; RV32-NEXT: bnez s4, .LBB7_28 -; RV32-NEXT: .LBB7_58: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB7_29 -; RV32-NEXT: j .LBB7_30 -; RV32-NEXT: .LBB7_59: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 92(sp) -; RV32-NEXT: bnez s4, .LBB7_34 -; RV32-NEXT: .LBB7_60: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB7_35 -; RV32-NEXT: j .LBB7_36 -; RV32-NEXT: .LBB7_61: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 84(sp) -; RV32-NEXT: bnez s4, .LBB7_40 -; RV32-NEXT: .LBB7_62: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB7_41 -; RV32-NEXT: j .LBB7_42 -; RV32-NEXT: .LBB7_63: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 76(sp) -; RV32-NEXT: bnez s4, .LBB7_46 -; RV32-NEXT: .LBB7_64: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: beqz a2, .LBB7_47 -; RV32-NEXT: j .LBB7_48 -; -; RV64-LABEL: fp2ui_v8f32_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -128 -; RV64-NEXT: .cfi_def_cfa_offset 128 -; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 128 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB7_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB7_2: -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV64-NEXT: vslidedown.vi v10, v8, 7 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB7_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB7_4: -; RV64-NEXT: vslidedown.vi v10, v8, 6 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 56(sp) -; RV64-NEXT: bnez a2, .LBB7_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB7_7 -; RV64-NEXT: .LBB7_6: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB7_7: -; RV64-NEXT: vslidedown.vi v10, v8, 5 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 48(sp) -; RV64-NEXT: bnez a2, .LBB7_9 -; RV64-NEXT: # %bb.8: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB7_10 -; RV64-NEXT: .LBB7_9: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB7_10: -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 40(sp) -; RV64-NEXT: bnez a2, .LBB7_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB7_13 -; RV64-NEXT: .LBB7_12: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB7_13: -; RV64-NEXT: vslidedown.vi v10, v8, 3 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 32(sp) -; RV64-NEXT: bnez a2, .LBB7_15 -; RV64-NEXT: # %bb.14: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB7_16 -; RV64-NEXT: .LBB7_15: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB7_16: -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 24(sp) -; RV64-NEXT: bnez a2, .LBB7_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB7_19 -; RV64-NEXT: .LBB7_18: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB7_19: -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.s a2, ft0, ft0 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: bnez a2, .LBB7_21 -; RV64-NEXT: # %bb.20: -; RV64-NEXT: li a0, 0 -; RV64-NEXT: j .LBB7_22 -; RV64-NEXT: .LBB7_21: -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: .LBB7_22: -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: mv a0, sp -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: addi sp, s0, -128 -; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 128 -; RV64-NEXT: ret +; CHECK-LABEL: fp2ui_v8f32_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, <8 x float>* %x %d = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %a) store <8 x i64> %d, <8 x i64>* %y @@ -1806,170 +147,18 @@ declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float>) define void @fp2si_v2f16_v2i64(<2 x half>* %x, <2 x i64>* %y) { -; RV32-LABEL: fp2si_v2f16_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 64(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs1, 56(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs2, 48(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset fs0, -32 -; RV32-NEXT: .cfi_offset fs1, -40 -; RV32-NEXT: .cfi_offset fs2, -48 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a0, %hi(.LCPI8_0) -; RV32-NEXT: flw fs1, %lo(.LCPI8_0)(a0) -; RV32-NEXT: mv s0, a1 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fcvt.s.h fs0, ft0 -; RV32-NEXT: fle.s s2, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: bnez s2, .LBB8_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB8_2: -; RV32-NEXT: lui a2, %hi(.LCPI8_1) -; RV32-NEXT: flw fs2, %lo(.LCPI8_1)(a2) -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: addi s1, a3, -1 -; RV32-NEXT: mv a3, s1 -; RV32-NEXT: beqz a2, .LBB8_18 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB8_19 -; RV32-NEXT: .LBB8_4: -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: bnez s2, .LBB8_6 -; RV32-NEXT: .LBB8_5: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: .LBB8_6: -; RV32-NEXT: li s2, -1 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: bnez a2, .LBB8_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: .LBB8_8: -; RV32-NEXT: bnez a1, .LBB8_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: .LBB8_10: -; RV32-NEXT: sw a3, 16(sp) -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fcvt.s.h fs0, ft0 -; RV32-NEXT: fle.s s3, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: beqz s3, .LBB8_20 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: beqz a2, .LBB8_21 -; RV32-NEXT: .LBB8_12: -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: beqz a1, .LBB8_22 -; RV32-NEXT: .LBB8_13: -; RV32-NEXT: sw s1, 28(sp) -; RV32-NEXT: beqz s3, .LBB8_23 -; RV32-NEXT: .LBB8_14: -; RV32-NEXT: beqz a2, .LBB8_24 -; RV32-NEXT: .LBB8_15: -; RV32-NEXT: bnez a1, .LBB8_17 -; RV32-NEXT: .LBB8_16: -; RV32-NEXT: li s2, 0 -; RV32-NEXT: .LBB8_17: -; RV32-NEXT: sw s2, 24(sp) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vse64.v v8, (s0) -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 64(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs1, 56(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs2, 48(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; RV32-NEXT: .LBB8_18: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB8_4 -; RV32-NEXT: .LBB8_19: -; RV32-NEXT: li a3, 0 -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: beqz s2, .LBB8_5 -; RV32-NEXT: j .LBB8_6 -; RV32-NEXT: .LBB8_20: -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: bnez a2, .LBB8_12 -; RV32-NEXT: .LBB8_21: -; RV32-NEXT: mv s1, a1 -; RV32-NEXT: feq.s a1, fs0, fs0 -; RV32-NEXT: bnez a1, .LBB8_13 -; RV32-NEXT: .LBB8_22: -; RV32-NEXT: li s1, 0 -; RV32-NEXT: sw s1, 28(sp) -; RV32-NEXT: bnez s3, .LBB8_14 -; RV32-NEXT: .LBB8_23: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: bnez a2, .LBB8_15 -; RV32-NEXT: .LBB8_24: -; RV32-NEXT: mv s2, a0 -; RV32-NEXT: beqz a1, .LBB8_16 -; RV32-NEXT: j .LBB8_17 -; -; RV64-LABEL: fp2si_v2f16_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.h a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB8_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.l.h a0, ft0, rtz -; RV64-NEXT: .LBB8_2: -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.h a2, ft0, ft0 -; RV64-NEXT: beqz a2, .LBB8_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.l.h a2, ft0, rtz -; RV64-NEXT: .LBB8_4: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: ret +; CHECK-LABEL: fp2si_v2f16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x %d = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> %a) store <2 x i64> %d, <2 x i64>* %y @@ -1978,142 +167,18 @@ declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half>) define void @fp2ui_v2f16_v2i64(<2 x half>* %x, <2 x i64>* %y) { -; RV32-LABEL: fp2ui_v2f16_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 72(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs1, 64(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs2, 56(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset fs0, -24 -; RV32-NEXT: .cfi_offset fs1, -32 -; RV32-NEXT: .cfi_offset fs2, -40 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: mv s0, a1 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fcvt.s.h fs0, ft0 -; RV32-NEXT: fmv.w.x fs1, zero -; RV32-NEXT: fle.s s2, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s2, .LBB9_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB9_2: -; RV32-NEXT: lui a2, %hi(.LCPI9_0) -; RV32-NEXT: flw fs2, %lo(.LCPI9_0)(a2) -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li s1, -1 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB9_13 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: beqz s2, .LBB9_14 -; RV32-NEXT: .LBB9_4: -; RV32-NEXT: li a1, -1 -; RV32-NEXT: bnez a2, .LBB9_6 -; RV32-NEXT: .LBB9_5: -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB9_6: -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fcvt.s.h fs0, ft0 -; RV32-NEXT: fle.s s2, fs1, fs0 -; RV32-NEXT: fmv.s fa0, fs0 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: bnez s2, .LBB9_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB9_8: -; RV32-NEXT: flt.s a2, fs2, fs0 -; RV32-NEXT: li a3, -1 -; RV32-NEXT: beqz a2, .LBB9_15 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: sw a3, 28(sp) -; RV32-NEXT: beqz s2, .LBB9_16 -; RV32-NEXT: .LBB9_10: -; RV32-NEXT: bnez a2, .LBB9_12 -; RV32-NEXT: .LBB9_11: -; RV32-NEXT: mv s1, a0 -; RV32-NEXT: .LBB9_12: -; RV32-NEXT: sw s1, 24(sp) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vse64.v v8, (s0) -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 72(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs1, 64(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs2, 56(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; RV32-NEXT: .LBB9_13: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: bnez s2, .LBB9_4 -; RV32-NEXT: .LBB9_14: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: beqz a2, .LBB9_5 -; RV32-NEXT: j .LBB9_6 -; RV32-NEXT: .LBB9_15: -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: sw a3, 28(sp) -; RV32-NEXT: bnez s2, .LBB9_10 -; RV32-NEXT: .LBB9_16: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: beqz a2, .LBB9_11 -; RV32-NEXT: j .LBB9_12 -; -; RV64-LABEL: fp2ui_v2f16_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.h a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB9_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.lu.h a0, ft0, rtz -; RV64-NEXT: .LBB9_2: -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.h a2, ft0, ft0 -; RV64-NEXT: beqz a2, .LBB9_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.lu.h a2, ft0, rtz -; RV64-NEXT: .LBB9_4: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: ret +; CHECK-LABEL: fp2ui_v2f16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x %d = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> %a) store <2 x i64> %d, <2 x i64>* %y @@ -2630,24 +695,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: feq.d a0, ft0, ft0 -; CHECK-NEXT: beqz a0, .LBB14_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fcvt.w.d a0, ft0, rtz -; CHECK-NEXT: .LBB14_2: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: feq.d a2, ft0, ft0 -; CHECK-NEXT: beqz a2, .LBB14_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fcvt.w.d a2, ft0, rtz -; CHECK-NEXT: .LBB14_4: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vmv.v.x v8, a2 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x @@ -2658,59 +709,16 @@ declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>) define void @fp2ui_v2f64_v2i32(<2 x double>* %x, <2 x i32>* %y) { -; RV32-LABEL: fp2ui_v2f64_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: feq.d a0, ft0, ft0 -; RV32-NEXT: beqz a0, .LBB15_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.wu.d a0, ft0, rtz -; RV32-NEXT: .LBB15_2: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: feq.d a2, ft0, ft0 -; RV32-NEXT: beqz a2, .LBB15_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fcvt.wu.d a2, ft0, rtz -; RV32-NEXT: .LBB15_4: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: ret -; -; RV64-LABEL: fp2ui_v2f64_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.d a0, ft0, ft0 -; RV64-NEXT: beqz a0, .LBB15_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.wu.d a0, ft0, rtz -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: .LBB15_2: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: feq.d a2, ft0, ft0 -; RV64-NEXT: beqz a2, .LBB15_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fcvt.wu.d a2, ft0, rtz -; RV64-NEXT: slli a2, a2, 32 -; RV64-NEXT: srli a2, a2, 32 -; RV64-NEXT: .LBB15_4: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: ret +; CHECK-LABEL: fp2ui_v2f64_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %d = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %a) store <2 x i32> %d, <2 x i32>* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -15,139 +15,37 @@ declare @llvm.fptosi.sat.nxv4f32.nxv4i64() define @test_signed_v2f32_v2i32( %f) { -; CHECK32-LABEL: test_signed_v2f32_v2i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK32-NEXT: flw ft0, %lo(.LCPI0_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK32-NEXT: vmfge.vf v9, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: lui a0, %hi(.LCPI0_1) -; CHECK32-NEXT: flw ft0, %lo(.LCPI0_1)(a0) -; CHECK32-NEXT: vfcvt.rtz.x.f.v v9, v8 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: vmfne.vv v8, v8, v8 -; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK32-NEXT: vmv.v.v v0, v8 -; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v2f32_v2i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK64-NEXT: flw ft0, %lo(.LCPI0_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK64-NEXT: vmfge.vf v9, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v9 -; CHECK64-NEXT: vfcvt.rtz.x.f.v v9, v8 -; CHECK64-NEXT: lui a0, %hi(.LCPI0_1) -; CHECK64-NEXT: flw ft0, %lo(.LCPI0_1)(a0) -; CHECK64-NEXT: li a0, 1 -; CHECK64-NEXT: slli a0, a0, 31 -; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: lui a0, 524288 -; CHECK64-NEXT: addiw a0, a0, -1 -; CHECK64-NEXT: vmfne.vv v8, v8, v8 -; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK64-NEXT: vmv.v.v v0, v8 -; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v2f32_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f32.nxv2i32( %f) ret %x } define @test_signed_v4f32_v4i32( %f) { -; CHECK32-LABEL: test_signed_v4f32_v4i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK32-NEXT: flw ft0, %lo(.LCPI1_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK32-NEXT: vmfge.vf v10, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v10 -; CHECK32-NEXT: lui a0, %hi(.LCPI1_1) -; CHECK32-NEXT: flw ft0, %lo(.LCPI1_1)(a0) -; CHECK32-NEXT: vfcvt.rtz.x.f.v v10, v8 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: vmerge.vxm v12, v10, a0, v0 -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: vmfne.vv v10, v8, v8 -; CHECK32-NEXT: vmerge.vxm v8, v12, a0, v0 -; CHECK32-NEXT: vmv1r.v v0, v10 -; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v4f32_v4i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK64-NEXT: flw ft0, %lo(.LCPI1_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK64-NEXT: vmfge.vf v10, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v10 -; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 -; CHECK64-NEXT: lui a0, %hi(.LCPI1_1) -; CHECK64-NEXT: flw ft0, %lo(.LCPI1_1)(a0) -; CHECK64-NEXT: li a0, 1 -; CHECK64-NEXT: slli a0, a0, 31 -; CHECK64-NEXT: vmerge.vxm v12, v10, a0, v0 -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: lui a0, 524288 -; CHECK64-NEXT: addiw a0, a0, -1 -; CHECK64-NEXT: vmfne.vv v10, v8, v8 -; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 -; CHECK64-NEXT: vmv1r.v v0, v10 -; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v4f32_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i32( %f) ret %x } define @test_signed_v8f32_v8i32( %f) { -; CHECK32-LABEL: test_signed_v8f32_v8i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK32-NEXT: flw ft0, %lo(.LCPI2_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK32-NEXT: vmfge.vf v12, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v12 -; CHECK32-NEXT: lui a0, %hi(.LCPI2_1) -; CHECK32-NEXT: flw ft0, %lo(.LCPI2_1)(a0) -; CHECK32-NEXT: vfcvt.rtz.x.f.v v12, v8 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: vmerge.vxm v16, v12, a0, v0 -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: vmfne.vv v12, v8, v8 -; CHECK32-NEXT: vmerge.vxm v8, v16, a0, v0 -; CHECK32-NEXT: vmv1r.v v0, v12 -; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v8f32_v8i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK64-NEXT: flw ft0, %lo(.LCPI2_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK64-NEXT: vmfge.vf v12, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v12 -; CHECK64-NEXT: vfcvt.rtz.x.f.v v12, v8 -; CHECK64-NEXT: lui a0, %hi(.LCPI2_1) -; CHECK64-NEXT: flw ft0, %lo(.LCPI2_1)(a0) -; CHECK64-NEXT: li a0, 1 -; CHECK64-NEXT: slli a0, a0, 31 -; CHECK64-NEXT: vmerge.vxm v16, v12, a0, v0 -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: lui a0, 524288 -; CHECK64-NEXT: addiw a0, a0, -1 -; CHECK64-NEXT: vmfne.vv v12, v8, v8 -; CHECK64-NEXT: vmerge.vxm v8, v16, a0, v0 -; CHECK64-NEXT: vmv1r.v v0, v12 -; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i32( %f) ret %x } @@ -155,19 +53,11 @@ define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: lui a0, %hi(.LCPI3_1) -; CHECK-NEXT: flw ft1, %lo(.LCPI3_1)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vf v10, v8, ft0 -; CHECK-NEXT: vfmin.vf v10, v10, ft1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) ret %x @@ -176,136 +66,38 @@ define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: lui a0, %hi(.LCPI4_1) -; CHECK-NEXT: flw ft1, %lo(.LCPI4_1)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vf v12, v8, ft0 -; CHECK-NEXT: vfmin.vf v12, v12, ft1 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) ret %x } define @test_signed_v2f32_v2i64( %f) { -; CHECK32-LABEL: test_signed_v2f32_v2i64: -; CHECK32: # %bb.0: -; CHECK32-NEXT: addi sp, sp, -16 -; CHECK32-NEXT: .cfi_def_cfa_offset 16 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: sw zero, 8(sp) -; CHECK32-NEXT: li a1, -1 -; CHECK32-NEXT: sw a1, 8(sp) -; CHECK32-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK32-NEXT: flw ft0, %lo(.LCPI5_0)(a1) -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK32-NEXT: vmfge.vf v9, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; CHECK32-NEXT: lui a0, %hi(.LCPI5_1) -; CHECK32-NEXT: flw ft0, %lo(.LCPI5_1)(a0) -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK32-NEXT: addi sp, sp, 16 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v2f32_v2i64: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK64-NEXT: flw ft0, %lo(.LCPI5_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK64-NEXT: vmfge.vf v9, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v9 -; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; CHECK64-NEXT: li a0, -1 -; CHECK64-NEXT: lui a1, %hi(.LCPI5_1) -; CHECK64-NEXT: flw ft0, %lo(.LCPI5_1)(a1) -; CHECK64-NEXT: slli a1, a0, 63 -; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK64-NEXT: vmerge.vxm v10, v10, a1, v0 -; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: srli a0, a0, 1 -; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK64-NEXT: vmfne.vv v0, v8, v8 -; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v2f32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f32.nxv2i64( %f) ret %x } define @test_signed_v4f32_v4i64( %f) { -; CHECK32-LABEL: test_signed_v4f32_v4i64: -; CHECK32: # %bb.0: -; CHECK32-NEXT: addi sp, sp, -16 -; CHECK32-NEXT: .cfi_def_cfa_offset 16 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: sw zero, 8(sp) -; CHECK32-NEXT: li a1, -1 -; CHECK32-NEXT: sw a1, 8(sp) -; CHECK32-NEXT: lui a1, %hi(.LCPI6_0) -; CHECK32-NEXT: flw ft0, %lo(.LCPI6_0)(a1) -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK32-NEXT: vmfge.vf v10, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v10 -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; CHECK32-NEXT: lui a0, %hi(.LCPI6_1) -; CHECK32-NEXT: flw ft0, %lo(.LCPI6_1)(a0) -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 -; CHECK32-NEXT: addi sp, sp, 16 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v4f32_v4i64: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK64-NEXT: flw ft0, %lo(.LCPI6_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK64-NEXT: vmfge.vf v10, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v10 -; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; CHECK64-NEXT: li a0, -1 -; CHECK64-NEXT: lui a1, %hi(.LCPI6_1) -; CHECK64-NEXT: flw ft0, %lo(.LCPI6_1)(a1) -; CHECK64-NEXT: slli a1, a0, 63 -; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK64-NEXT: vmerge.vxm v12, v12, a1, v0 -; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: srli a0, a0, 1 -; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK64-NEXT: vmfne.vv v0, v8, v8 -; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v4f32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i64( %f) ret %x } @@ -323,19 +115,11 @@ define @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI7_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI7_0)(a0) -; CHECK-NEXT: lui a0, %hi(.LCPI7_1) -; CHECK-NEXT: fld ft1, %lo(.LCPI7_1)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vf v10, v8, ft0 -; CHECK-NEXT: vfmin.vf v10, v10, ft1 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) ret %x @@ -344,19 +128,11 @@ define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI8_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: lui a0, %hi(.LCPI8_1) -; CHECK-NEXT: fld ft1, %lo(.LCPI8_1)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vf v12, v8, ft0 -; CHECK-NEXT: vfmin.vf v12, v12, ft1 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) ret %x @@ -365,19 +141,11 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI9_0) -; CHECK-NEXT: fld ft0, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: lui a0, %hi(.LCPI9_1) -; CHECK-NEXT: fld ft1, %lo(.LCPI9_1)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vf v16, v8, ft0 -; CHECK-NEXT: vfmin.vf v16, v16, ft1 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmerge.vim v8, v24, 0, v0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 +; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) ret %x @@ -430,107 +198,25 @@ } define @test_signed_v2f64_v2i64( %f) { -; CHECK32-LABEL: test_signed_v2f64_v2i64: -; CHECK32: # %bb.0: -; CHECK32-NEXT: addi sp, sp, -16 -; CHECK32-NEXT: .cfi_def_cfa_offset 16 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: sw zero, 8(sp) -; CHECK32-NEXT: li a1, -1 -; CHECK32-NEXT: sw a1, 8(sp) -; CHECK32-NEXT: lui a1, %hi(.LCPI12_0) -; CHECK32-NEXT: fld ft0, %lo(.LCPI12_0)(a1) -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK32-NEXT: vmfge.vf v10, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v10 -; CHECK32-NEXT: vfcvt.rtz.x.f.v v10, v8 -; CHECK32-NEXT: lui a0, %hi(.LCPI12_1) -; CHECK32-NEXT: fld ft0, %lo(.LCPI12_1)(a0) -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK32-NEXT: addi sp, sp, 16 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v2f64_v2i64: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK64-NEXT: vmfge.vf v10, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v10 -; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 -; CHECK64-NEXT: lui a0, %hi(.LCPI12_1) -; CHECK64-NEXT: fld ft0, %lo(.LCPI12_1)(a0) -; CHECK64-NEXT: li a0, -1 -; CHECK64-NEXT: slli a1, a0, 63 -; CHECK64-NEXT: vmerge.vxm v12, v10, a1, v0 -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: srli a0, a0, 1 -; CHECK64-NEXT: vmfne.vv v10, v8, v8 -; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 -; CHECK64-NEXT: vmv1r.v v0, v10 -; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v2f64_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i64( %f) ret %x } define @test_signed_v4f64_v4i64( %f) { -; CHECK32-LABEL: test_signed_v4f64_v4i64: -; CHECK32: # %bb.0: -; CHECK32-NEXT: addi sp, sp, -16 -; CHECK32-NEXT: .cfi_def_cfa_offset 16 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: sw zero, 8(sp) -; CHECK32-NEXT: li a1, -1 -; CHECK32-NEXT: sw a1, 8(sp) -; CHECK32-NEXT: lui a1, %hi(.LCPI13_0) -; CHECK32-NEXT: fld ft0, %lo(.LCPI13_0)(a1) -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK32-NEXT: vmfge.vf v12, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v12 -; CHECK32-NEXT: vfcvt.rtz.x.f.v v12, v8 -; CHECK32-NEXT: lui a0, %hi(.LCPI13_1) -; CHECK32-NEXT: fld ft0, %lo(.LCPI13_1)(a0) -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 -; CHECK32-NEXT: addi sp, sp, 16 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v4f64_v4i64: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK64-NEXT: vmfge.vf v12, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v12 -; CHECK64-NEXT: vfcvt.rtz.x.f.v v12, v8 -; CHECK64-NEXT: lui a0, %hi(.LCPI13_1) -; CHECK64-NEXT: fld ft0, %lo(.LCPI13_1)(a0) -; CHECK64-NEXT: li a0, -1 -; CHECK64-NEXT: slli a1, a0, 63 -; CHECK64-NEXT: vmerge.vxm v16, v12, a1, v0 -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: srli a0, a0, 1 -; CHECK64-NEXT: vmfne.vv v12, v8, v8 -; CHECK64-NEXT: vmerge.vxm v8, v16, a0, v0 -; CHECK64-NEXT: vmv1r.v v0, v12 -; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v4f64_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i64( %f) ret %x } @@ -547,378 +233,98 @@ declare @llvm.fptosi.sat.nxv4f16.nxv4i64() define @test_signed_v2f16_v2i32( %f) { -; CHECK32-LABEL: test_signed_v2f16_v2i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK32-NEXT: flh ft0, %lo(.LCPI14_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK32-NEXT: vmfge.vf v9, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v9, v8 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: lui a1, %hi(.LCPI14_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI14_1)(a1) -; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v2f16_v2i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK64-NEXT: flh ft0, %lo(.LCPI14_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK64-NEXT: vmfge.vf v9, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v9 -; CHECK64-NEXT: vfwcvt.rtz.x.f.v v9, v8 -; CHECK64-NEXT: lui a0, %hi(.LCPI14_1) -; CHECK64-NEXT: flh ft0, %lo(.LCPI14_1)(a0) -; CHECK64-NEXT: li a0, 1 -; CHECK64-NEXT: slli a0, a0, 31 -; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: lui a0, 524288 -; CHECK64-NEXT: addiw a0, a0, -1 -; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK64-NEXT: vmfne.vv v0, v8, v8 -; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v2f16_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) ret %x } define @test_signed_v4f16_v4i32( %f) { -; CHECK32-LABEL: test_signed_v4f16_v4i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI15_0) -; CHECK32-NEXT: flh ft0, %lo(.LCPI15_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK32-NEXT: vmfge.vf v9, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: lui a1, %hi(.LCPI15_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI15_1)(a1) -; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK32-NEXT: vmerge.vxm v10, v10, a0, v0 -; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK32-NEXT: vmerge.vxm v10, v10, a0, v0 -; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v4f16_v4i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI15_0) -; CHECK64-NEXT: flh ft0, %lo(.LCPI15_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK64-NEXT: vmfge.vf v9, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v9 -; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; CHECK64-NEXT: lui a0, %hi(.LCPI15_1) -; CHECK64-NEXT: flh ft0, %lo(.LCPI15_1)(a0) -; CHECK64-NEXT: li a0, 1 -; CHECK64-NEXT: slli a0, a0, 31 -; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: lui a0, 524288 -; CHECK64-NEXT: addiw a0, a0, -1 -; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK64-NEXT: vmfne.vv v0, v8, v8 -; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v4f16_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) ret %x } define @test_signed_v8f16_v8i32( %f) { -; CHECK32-LABEL: test_signed_v8f16_v8i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI16_0) -; CHECK32-NEXT: flh ft0, %lo(.LCPI16_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK32-NEXT: vmfge.vf v10, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v10 -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: lui a1, %hi(.LCPI16_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI16_1)(a1) -; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK32-NEXT: vmerge.vxm v12, v12, a0, v0 -; CHECK32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK32-NEXT: vmerge.vxm v12, v12, a0, v0 -; CHECK32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v8f16_v8i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI16_0) -; CHECK64-NEXT: flh ft0, %lo(.LCPI16_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK64-NEXT: vmfge.vf v10, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v10 -; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; CHECK64-NEXT: lui a0, %hi(.LCPI16_1) -; CHECK64-NEXT: flh ft0, %lo(.LCPI16_1)(a0) -; CHECK64-NEXT: li a0, 1 -; CHECK64-NEXT: slli a0, a0, 31 -; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: lui a0, 524288 -; CHECK64-NEXT: addiw a0, a0, -1 -; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK64-NEXT: vmfne.vv v0, v8, v8 -; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v8f16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i32( %f) ret %x } define @test_signed_v4f16_v4i16( %f) { -; CHECK32-LABEL: test_signed_v4f16_v4i16: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK32-NEXT: flh ft0, %lo(.LCPI17_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK32-NEXT: vmfge.vf v9, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: lui a0, %hi(.LCPI17_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI17_1)(a0) -; CHECK32-NEXT: vfcvt.rtz.x.f.v v9, v8 -; CHECK32-NEXT: lui a0, 8 -; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: vmfne.vv v8, v8, v8 -; CHECK32-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK32-NEXT: vmv.v.v v0, v8 -; CHECK32-NEXT: vmerge.vim v8, v9, 0, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v4f16_v4i16: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK64-NEXT: flh ft0, %lo(.LCPI17_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK64-NEXT: vmfge.vf v9, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v9 -; CHECK64-NEXT: lui a0, %hi(.LCPI17_1) -; CHECK64-NEXT: flh ft0, %lo(.LCPI17_1)(a0) -; CHECK64-NEXT: vfcvt.rtz.x.f.v v9, v8 -; CHECK64-NEXT: lui a0, 8 -; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: addiw a0, a0, -1 -; CHECK64-NEXT: vmfne.vv v8, v8, v8 -; CHECK64-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK64-NEXT: vmv.v.v v0, v8 -; CHECK64-NEXT: vmerge.vim v8, v9, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v4f16_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) ret %x } define @test_signed_v8f16_v8i16( %f) { -; CHECK32-LABEL: test_signed_v8f16_v8i16: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK32-NEXT: flh ft0, %lo(.LCPI18_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK32-NEXT: vmfge.vf v10, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v10 -; CHECK32-NEXT: lui a0, %hi(.LCPI18_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI18_1)(a0) -; CHECK32-NEXT: vfcvt.rtz.x.f.v v10, v8 -; CHECK32-NEXT: lui a0, 8 -; CHECK32-NEXT: vmerge.vxm v12, v10, a0, v0 -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: vmfne.vv v10, v8, v8 -; CHECK32-NEXT: vmerge.vxm v8, v12, a0, v0 -; CHECK32-NEXT: vmv1r.v v0, v10 -; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v8f16_v8i16: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK64-NEXT: flh ft0, %lo(.LCPI18_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK64-NEXT: vmfge.vf v10, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v10 -; CHECK64-NEXT: lui a0, %hi(.LCPI18_1) -; CHECK64-NEXT: flh ft0, %lo(.LCPI18_1)(a0) -; CHECK64-NEXT: vfcvt.rtz.x.f.v v10, v8 -; CHECK64-NEXT: lui a0, 8 -; CHECK64-NEXT: vmerge.vxm v12, v10, a0, v0 -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: addiw a0, a0, -1 -; CHECK64-NEXT: vmfne.vv v10, v8, v8 -; CHECK64-NEXT: vmerge.vxm v8, v12, a0, v0 -; CHECK64-NEXT: vmv1r.v v0, v10 -; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v8f16_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) ret %x } define @test_signed_v2f16_v2i64( %f) { -; CHECK32-LABEL: test_signed_v2f16_v2i64: -; CHECK32: # %bb.0: -; CHECK32-NEXT: addi sp, sp, -16 -; CHECK32-NEXT: .cfi_def_cfa_offset 16 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: sw zero, 8(sp) -; CHECK32-NEXT: li a1, -1 -; CHECK32-NEXT: sw a1, 8(sp) -; CHECK32-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK32-NEXT: flh ft0, %lo(.LCPI19_0)(a1) -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK32-NEXT: vmfge.vf v9, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: vfwcvt.f.f.v v9, v8 -; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v10, v9 -; CHECK32-NEXT: lui a0, %hi(.LCPI19_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI19_1)(a0) -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t -; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK32-NEXT: addi sp, sp, 16 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v2f16_v2i64: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK64-NEXT: flh ft0, %lo(.LCPI19_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK64-NEXT: vmfge.vf v9, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v9 -; CHECK64-NEXT: vfwcvt.f.f.v v9, v8 -; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK64-NEXT: vfwcvt.rtz.x.f.v v10, v9 -; CHECK64-NEXT: li a0, -1 -; CHECK64-NEXT: lui a1, %hi(.LCPI19_1) -; CHECK64-NEXT: flh ft0, %lo(.LCPI19_1)(a1) -; CHECK64-NEXT: slli a1, a0, 63 -; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK64-NEXT: vmerge.vxm v10, v10, a1, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: srli a0, a0, 1 -; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK64-NEXT: vmerge.vxm v10, v10, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK64-NEXT: vmfne.vv v0, v8, v8 -; CHECK64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK64-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v2f16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) ret %x } define @test_signed_v4f16_v4i64( %f) { -; CHECK32-LABEL: test_signed_v4f16_v4i64: -; CHECK32: # %bb.0: -; CHECK32-NEXT: addi sp, sp, -16 -; CHECK32-NEXT: .cfi_def_cfa_offset 16 -; CHECK32-NEXT: lui a0, 524288 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: sw zero, 8(sp) -; CHECK32-NEXT: li a1, -1 -; CHECK32-NEXT: sw a1, 8(sp) -; CHECK32-NEXT: lui a1, %hi(.LCPI20_0) -; CHECK32-NEXT: flh ft0, %lo(.LCPI20_0)(a1) -; CHECK32-NEXT: addi a0, a0, -1 -; CHECK32-NEXT: sw a0, 12(sp) -; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK32-NEXT: vmfge.vf v9, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: vfwcvt.f.f.v v10, v8 -; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v10 -; CHECK32-NEXT: lui a0, %hi(.LCPI20_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI20_1)(a0) -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t -; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t -; CHECK32-NEXT: vmfne.vv v0, v8, v8 -; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 -; CHECK32-NEXT: addi sp, sp, 16 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v4f16_v4i64: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK64-NEXT: flh ft0, %lo(.LCPI20_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK64-NEXT: vmfge.vf v9, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v9 -; CHECK64-NEXT: vfwcvt.f.f.v v10, v8 -; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK64-NEXT: vfwcvt.rtz.x.f.v v12, v10 -; CHECK64-NEXT: li a0, -1 -; CHECK64-NEXT: lui a1, %hi(.LCPI20_1) -; CHECK64-NEXT: flh ft0, %lo(.LCPI20_1)(a1) -; CHECK64-NEXT: slli a1, a0, 63 -; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK64-NEXT: vmerge.vxm v12, v12, a1, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK64-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK64-NEXT: srli a0, a0, 1 -; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK64-NEXT: vmerge.vxm v12, v12, a0, v0 -; CHECK64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK64-NEXT: vmfne.vv v0, v8, v8 -; CHECK64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK64-NEXT: vmerge.vim v8, v12, 0, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v4f16_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i64( %f) ret %x } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK32: {{.*}} +; CHECK64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll @@ -17,17 +17,10 @@ define @test_signed_v2f32_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v9, v8, ft0 -; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: vmfge.vf v10, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f32.nxv2i32( %f) ret %x @@ -36,17 +29,10 @@ define @test_signed_v4f32_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v10, v8, ft0 -; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: vmfge.vf v11, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i32( %f) ret %x @@ -55,17 +41,10 @@ define @test_signed_v8f32_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfgt.vf v12, v8, ft0 -; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: vmfge.vf v13, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v13 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f32.nxv8i32( %f) ret %x @@ -74,14 +53,11 @@ define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI3_0)(a0) -; CHECK-NEXT: fmv.w.x ft1, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft1 -; CHECK-NEXT: vfmin.vf v10, v8, ft0 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10 +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) ret %x @@ -90,14 +66,11 @@ define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI4_0)(a0) -; CHECK-NEXT: fmv.w.x ft1, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft1 -; CHECK-NEXT: vfmin.vf v12, v8, ft0 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12 +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) ret %x @@ -106,18 +79,11 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v9, v8, ft0 -; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: vmfge.vf v10, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f32.nxv2i64( %f) ret %x @@ -126,18 +92,11 @@ define @test_signed_v4f32_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v10, v8, ft0 -; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: vmfge.vf v11, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i64( %f) ret %x @@ -154,85 +113,40 @@ declare @llvm.fptoui.sat.nxv4f64.nxv4i64() define @test_signed_v2f64_v2i32( %f) { -; CHECK32-LABEL: test_signed_v2f64_v2i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI7_0) -; CHECK32-NEXT: fld ft0, %lo(.LCPI7_0)(a0) -; CHECK32-NEXT: fcvt.d.w ft1, zero -; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK32-NEXT: vfmax.vf v8, v8, ft1 -; CHECK32-NEXT: vfmin.vf v10, v8, ft0 -; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v10 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v2f64_v2i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI7_0) -; CHECK64-NEXT: fld ft0, %lo(.LCPI7_0)(a0) -; CHECK64-NEXT: fmv.d.x ft1, zero -; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK64-NEXT: vfmax.vf v8, v8, ft1 -; CHECK64-NEXT: vfmin.vf v10, v8, ft0 -; CHECK64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v10 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v2f64_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) ret %x } define @test_signed_v4f64_v4i32( %f) { -; CHECK32-LABEL: test_signed_v4f64_v4i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI8_0) -; CHECK32-NEXT: fld ft0, %lo(.LCPI8_0)(a0) -; CHECK32-NEXT: fcvt.d.w ft1, zero -; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK32-NEXT: vfmax.vf v8, v8, ft1 -; CHECK32-NEXT: vfmin.vf v12, v8, ft0 -; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v12 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v4f64_v4i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI8_0) -; CHECK64-NEXT: fld ft0, %lo(.LCPI8_0)(a0) -; CHECK64-NEXT: fmv.d.x ft1, zero -; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK64-NEXT: vfmax.vf v8, v8, ft1 -; CHECK64-NEXT: vfmin.vf v12, v8, ft0 -; CHECK64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v12 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v4f64_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) ret %x } define @test_signed_v8f64_v8i32( %f) { -; CHECK32-LABEL: test_signed_v8f64_v8i32: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI9_0) -; CHECK32-NEXT: fld ft0, %lo(.LCPI9_0)(a0) -; CHECK32-NEXT: fcvt.d.w ft1, zero -; CHECK32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK32-NEXT: vfmax.vf v8, v8, ft1 -; CHECK32-NEXT: vfmin.vf v16, v8, ft0 -; CHECK32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK32-NEXT: vfncvt.rtz.xu.f.w v8, v16 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v8f64_v8i32: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI9_0) -; CHECK64-NEXT: fld ft0, %lo(.LCPI9_0)(a0) -; CHECK64-NEXT: fmv.d.x ft1, zero -; CHECK64-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK64-NEXT: vfmax.vf v8, v8, ft1 -; CHECK64-NEXT: vfmin.vf v16, v8, ft0 -; CHECK64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK64-NEXT: vfncvt.rtz.xu.f.w v8, v16 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v8f64_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 +; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) ret %x } @@ -302,69 +216,25 @@ } define @test_signed_v2f64_v2i64( %f) { -; CHECK32-LABEL: test_signed_v2f64_v2i64: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK32-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK32-NEXT: vmfgt.vf v10, v8, ft0 -; CHECK32-NEXT: fcvt.d.w ft0, zero -; CHECK32-NEXT: vmfge.vf v11, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v11 -; CHECK32-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK32-NEXT: vmv1r.v v0, v10 -; CHECK32-NEXT: vmerge.vim v8, v8, -1, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v2f64_v2i64: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK64-NEXT: vmfgt.vf v10, v8, ft0 -; CHECK64-NEXT: fmv.d.x ft0, zero -; CHECK64-NEXT: vmfge.vf v11, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v11 -; CHECK64-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK64-NEXT: vmv1r.v v0, v10 -; CHECK64-NEXT: vmerge.vim v8, v8, -1, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v2f64_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f64.nxv2i64( %f) ret %x } define @test_signed_v4f64_v4i64( %f) { -; CHECK32-LABEL: test_signed_v4f64_v4i64: -; CHECK32: # %bb.0: -; CHECK32-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK32-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK32-NEXT: vmfgt.vf v12, v8, ft0 -; CHECK32-NEXT: fcvt.d.w ft0, zero -; CHECK32-NEXT: vmfge.vf v13, v8, ft0 -; CHECK32-NEXT: vmnot.m v0, v13 -; CHECK32-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK32-NEXT: vmv1r.v v0, v12 -; CHECK32-NEXT: vmerge.vim v8, v8, -1, v0 -; CHECK32-NEXT: ret -; -; CHECK64-LABEL: test_signed_v4f64_v4i64: -; CHECK64: # %bb.0: -; CHECK64-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; CHECK64-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK64-NEXT: vmfgt.vf v12, v8, ft0 -; CHECK64-NEXT: fmv.d.x ft0, zero -; CHECK64-NEXT: vmfge.vf v13, v8, ft0 -; CHECK64-NEXT: vmnot.m v0, v13 -; CHECK64-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK64-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK64-NEXT: vmv1r.v v0, v12 -; CHECK64-NEXT: vmerge.vim v8, v8, -1, v0 -; CHECK64-NEXT: ret +; CHECK-LABEL: test_signed_v4f64_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i64( %f) ret %x } @@ -383,18 +253,11 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmfgt.vf v9, v8, ft0 -; CHECK-NEXT: fmv.h.x ft0, zero -; CHECK-NEXT: vmfge.vf v10, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v10 -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) ret %x @@ -403,18 +266,11 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI15_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI15_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v9, v8, ft0 -; CHECK-NEXT: fmv.h.x ft0, zero -; CHECK-NEXT: vmfge.vf v10, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i32( %f) ret %x @@ -423,18 +279,11 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI16_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI16_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v10, v8, ft0 -; CHECK-NEXT: fmv.h.x ft0, zero -; CHECK-NEXT: vmfge.vf v11, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f16.nxv8i32( %f) ret %x @@ -443,17 +292,10 @@ define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v9, v8, ft0 -; CHECK-NEXT: fmv.h.x ft0, zero -; CHECK-NEXT: vmfge.vf v10, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) ret %x @@ -462,17 +304,10 @@ define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v10, v8, ft0 -; CHECK-NEXT: fmv.h.x ft0, zero -; CHECK-NEXT: vmfge.vf v11, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v11 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f16.nxv8i16( %f) ret %x @@ -481,20 +316,13 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmfgt.vf v9, v8, ft0 -; CHECK-NEXT: fmv.h.x ft0, zero -; CHECK-NEXT: vmfge.vf v10, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v10 -; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmerge.vim v10, v12, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v10, -1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f16.nxv2i64( %f) ret %x @@ -503,20 +331,13 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: flh ft0, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v9, v8, ft0 -; CHECK-NEXT: fmv.h.x ft0, zero -; CHECK-NEXT: vmfge.vf v10, v8, ft0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v12, -1, v0 +; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i64( %f) ret %x