diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -236,16 +236,17 @@ FABS_VL, FSQRT_VL, FCOPYSIGN_VL, // Has a merge operand - VFCVT_RTZ_X_F_VL, VFCVT_RTZ_XU_F_VL, - VFCVT_X_F_VL, + VFCVT_RTZ_X_F_VL, VFCVT_XU_F_VL, + VFCVT_X_F_VL, VFROUND_NOEXCEPT_VL, - VFCVT_RM_X_F_VL, // Has a rounding mode operand. VFCVT_RM_XU_F_VL, // Has a rounding mode operand. + VFCVT_RM_X_F_VL, // Has a rounding mode operand. SINT_TO_FP_VL, UINT_TO_FP_VL, VFCVT_RM_F_XU_VL, // Has a rounding mode operand. + VFCVT_RM_F_X_VL, // Has a rounding mode operand. FP_ROUND_VL, FP_EXTEND_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9580,9 +9580,12 @@ MVT ContainerVT = VT.getSimpleVT(); SDValue XVal = Src.getOperand(0); - // TODO: Support combining with widening and narrowing instructions - // For now only support conversions of the same bit size - if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits()) + // For widening and narrowing conversions we just combine it into a + // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They + // end up getting lowered to their appropriate pseudo instructions based on + // their operand types + if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || + VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) return SDValue(); // Make fixed-length vectors scalable first @@ -11636,6 +11639,11 @@ return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); case RISCV::PseudoQuietFLT_D: return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); + + // ========================================================================= + // VFCVT + // ========================================================================= + case RISCV::PseudoVFCVT_RM_X_F_V_M1_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK); case RISCV::PseudoVFCVT_RM_X_F_V_M2_MASK: @@ -11648,6 +11656,7 @@ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); + case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK); case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK: @@ -11660,6 +11669,7 @@ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF2_MASK); case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF4_MASK); + case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK); case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK: @@ -11672,6 +11682,102 @@ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF2_MASK); case RISCV::PseudoVFCVT_RM_F_XU_V_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF4_MASK); + + case RISCV::PseudoVFCVT_RM_F_X_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M1_MASK); + case RISCV::PseudoVFCVT_RM_F_X_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M2_MASK); + case RISCV::PseudoVFCVT_RM_F_X_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M4_MASK); + case RISCV::PseudoVFCVT_RM_F_X_V_M8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M8_MASK); + case RISCV::PseudoVFCVT_RM_F_X_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_MF2_MASK); + case RISCV::PseudoVFCVT_RM_F_X_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_MF4_MASK); + + // ========================================================================= + // VFWCVT + // ========================================================================= + + case RISCV::PseudoVFWCVT_RM_XU_F_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M1_MASK); + case RISCV::PseudoVFWCVT_RM_XU_F_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M2_MASK); + case RISCV::PseudoVFWCVT_RM_XU_F_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M4_MASK); + case RISCV::PseudoVFWCVT_RM_XU_F_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF2_MASK); + case RISCV::PseudoVFWCVT_RM_XU_F_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF4_MASK); + + case RISCV::PseudoVFWCVT_RM_X_F_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M1_MASK); + case RISCV::PseudoVFWCVT_RM_X_F_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M2_MASK); + case RISCV::PseudoVFWCVT_RM_X_F_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M4_MASK); + case RISCV::PseudoVFWCVT_RM_X_F_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF2_MASK); + case RISCV::PseudoVFWCVT_RM_X_F_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF4_MASK); + + case RISCV::PseudoVFWCVT_RM_F_XU_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M1_MASK); + case RISCV::PseudoVFWCVT_RM_F_XU_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M2_MASK); + case RISCV::PseudoVFWCVT_RM_F_XU_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M4_MASK); + case RISCV::PseudoVFWCVT_RM_F_XU_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF2_MASK); + case RISCV::PseudoVFWCVT_RM_F_XU_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF4_MASK); + case RISCV::PseudoVFWCVT_RM_F_XU_V_MF8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF8_MASK); + + case RISCV::PseudoVFWCVT_RM_F_X_V_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M1_MASK); + case RISCV::PseudoVFWCVT_RM_F_X_V_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M2_MASK); + case RISCV::PseudoVFWCVT_RM_F_X_V_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M4_MASK); + case RISCV::PseudoVFWCVT_RM_F_X_V_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF2_MASK); + case RISCV::PseudoVFWCVT_RM_F_X_V_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF4_MASK); + case RISCV::PseudoVFWCVT_RM_F_X_V_MF8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF8_MASK); + + // ========================================================================= + // VFNCVT + // ========================================================================= + + case RISCV::PseudoVFNCVT_RM_XU_F_W_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M1_MASK); + case RISCV::PseudoVFNCVT_RM_XU_F_W_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M2_MASK); + case RISCV::PseudoVFNCVT_RM_XU_F_W_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M4_MASK); + case RISCV::PseudoVFNCVT_RM_XU_F_W_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF2_MASK); + case RISCV::PseudoVFNCVT_RM_XU_F_W_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF4_MASK); + case RISCV::PseudoVFNCVT_RM_XU_F_W_MF8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_XU_F_W_MF8_MASK); + + case RISCV::PseudoVFNCVT_RM_X_F_W_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M1_MASK); + case RISCV::PseudoVFNCVT_RM_X_F_W_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M2_MASK); + case RISCV::PseudoVFNCVT_RM_X_F_W_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M4_MASK); + case RISCV::PseudoVFNCVT_RM_X_F_W_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF2_MASK); + case RISCV::PseudoVFNCVT_RM_X_F_W_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF4_MASK); + case RISCV::PseudoVFNCVT_RM_X_F_W_MF8_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF8_MASK); + case RISCV::PseudoVFNCVT_RM_F_XU_W_M1_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK); case RISCV::PseudoVFNCVT_RM_F_XU_W_M2_MASK: @@ -11682,6 +11788,18 @@ return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK); case RISCV::PseudoVFNCVT_RM_F_XU_W_MF4_MASK: return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK); + + case RISCV::PseudoVFNCVT_RM_F_X_W_M1_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK); + case RISCV::PseudoVFNCVT_RM_F_X_W_M2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M2_MASK); + case RISCV::PseudoVFNCVT_RM_F_X_W_M4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M4_MASK); + case RISCV::PseudoVFNCVT_RM_F_X_W_MF2_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK); + case RISCV::PseudoVFNCVT_RM_F_X_W_MF4_MASK: + return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK); + case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, RISCV::PseudoVFCVT_F_X_V_M1_MASK); @@ -13281,6 +13399,7 @@ NODE_NAME_CASE(SINT_TO_FP_VL) NODE_NAME_CASE(UINT_TO_FP_VL) NODE_NAME_CASE(VFCVT_RM_F_XU_VL) + NODE_NAME_CASE(VFCVT_RM_F_X_VL) NODE_NAME_CASE(FP_EXTEND_VL) NODE_NAME_CASE(FP_ROUND_VL) NODE_NAME_CASE(VWMUL_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3435,6 +3435,18 @@ } } +multiclass VPseudoVWCVTI_RM_V { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxListFW in { + defvar mx = m.MX; + defvar WriteVFWCvtFToIV_MX = !cast("WriteVFWCvtFToIV_" # mx); + defvar ReadVFWCvtFToIV_MX = !cast("ReadVFWCvtFToIV_" # mx); + + defm _V : VPseudoConversionRM, + Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>; + } +} + multiclass VPseudoVWCVTF_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { @@ -3447,6 +3459,18 @@ } } +multiclass VPseudoVWCVTF_RM_V { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVFWCvtIToFV_MX = !cast("WriteVFWCvtIToFV_" # mx); + defvar ReadVFWCvtIToFV_MX = !cast("ReadVFWCvtIToFV_" # mx); + + defm _V : VPseudoConversionRM, + Sched<[WriteVFWCvtIToFV_MX, ReadVFWCvtIToFV_MX, ReadVMask]>; + } +} + multiclass VPseudoVWCVTD_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -3471,6 +3495,18 @@ } } +multiclass VPseudoVNCVTI_RM_W { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVFNCvtFToIV_MX = !cast("WriteVFNCvtFToIV_" # mx); + defvar ReadVFNCvtFToIV_MX = !cast("ReadVFNCvtFToIV_" # mx); + + defm _W : VPseudoConversionRM, + Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>; + } +} + multiclass VPseudoVNCVTF_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -5510,16 +5546,20 @@ defm PseudoVFCVT_XU_F : VPseudoVCVTI_V; defm PseudoVFCVT_X_F : VPseudoVCVTI_V; } -defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V; -defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V; + defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V; defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V; + +defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V; +defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V; + defm PseudoVFROUND_NOEXCEPT : VPseudoVFROUND_NOEXCEPT_V; let Uses = [FRM] in { defm PseudoVFCVT_F_XU : VPseudoVCVTF_V; defm PseudoVFCVT_F_X : VPseudoVCVTF_V; } defm PseudoVFCVT_RM_F_XU : VPseudoVCVTF_RM_V; +defm PseudoVFCVT_RM_F_X : VPseudoVCVTF_RM_V; } // mayRaiseFPException = true //===----------------------------------------------------------------------===// @@ -5530,10 +5570,19 @@ defm PseudoVFWCVT_XU_F : VPseudoVWCVTI_V; defm PseudoVFWCVT_X_F : VPseudoVWCVTI_V; } +defm PseudoVFWCVT_RM_XU_F : VPseudoVWCVTI_RM_V; +defm PseudoVFWCVT_RM_X_F : VPseudoVWCVTI_RM_V; + defm PseudoVFWCVT_RTZ_XU_F : VPseudoVWCVTI_V; defm PseudoVFWCVT_RTZ_X_F : VPseudoVWCVTI_V; + +let Uses = [FRM] in { defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V; defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V; +} +defm PseudoVFWCVT_RM_F_XU : VPseudoVWCVTF_RM_V; +defm PseudoVFWCVT_RM_F_X : VPseudoVWCVTF_RM_V; + defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V; } // mayRaiseFPException = true @@ -5545,15 +5594,23 @@ defm PseudoVFNCVT_XU_F : VPseudoVNCVTI_W; defm PseudoVFNCVT_X_F : VPseudoVNCVTI_W; } +defm PseudoVFNCVT_RM_XU_F : VPseudoVNCVTI_RM_W; +defm PseudoVFNCVT_RM_X_F : VPseudoVNCVTI_RM_W; + defm PseudoVFNCVT_RTZ_XU_F : VPseudoVNCVTI_W; defm PseudoVFNCVT_RTZ_X_F : VPseudoVNCVTI_W; + let Uses = [FRM] in { defm PseudoVFNCVT_F_XU : VPseudoVNCVTF_W; defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W; -defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W; } -defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; defm PseudoVFNCVT_RM_F_XU : VPseudoVNCVTF_RM_W; +defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W; + +let Uses = [FRM] in +defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W; + +defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // mayRaiseFPException = true } // Predicates = [HasVInstructionsAnyF] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -136,6 +136,12 @@ SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT> ]>; +def SDT_RISCVFP2IOp_RM_VL : SDTypeProfile<1, 4, [ + SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, + SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>, + SDTCisVT<4, XLenVT> // Rounding mode +]>; + def SDT_RISCVI2FPOp_VL : SDTypeProfile<1, 3, [ SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT> @@ -143,25 +149,25 @@ def SDT_RISCVI2FPOp_RM_VL : SDTypeProfile<1, 4, [ SDTCisFP<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>, - SDTCisVT<4, XLenVT> + SDTCisVT<4, XLenVT> // Rounding mode ]>; -def riscv_vfcvt_rtz_x_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_X_F_VL", SDT_RISCVFP2IOp_VL>; +// Float -> Int +def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>; +def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>; +def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVFP2IOp_RM_VL>; +def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVFP2IOp_RM_VL>; + def riscv_vfcvt_rtz_xu_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_XU_F_VL", SDT_RISCVFP2IOp_VL>; +def riscv_vfcvt_rtz_x_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_X_F_VL", SDT_RISCVFP2IOp_VL>; + +// Int -> Float def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; def riscv_vfcvt_rm_f_xu_vl : SDNode<"RISCVISD::VFCVT_RM_F_XU_VL", SDT_RISCVI2FPOp_RM_VL>; +def riscv_vfcvt_rm_f_x_vl : SDNode<"RISCVISD::VFCVT_RM_F_X_VL", SDT_RISCVI2FPOp_RM_VL>; -def SDT_RISCVVecCvtF2XOp_VL : SDTypeProfile<1, 4, [ - SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, - SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>, - SDTCisVT<4, XLenVT> -]>; -def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVVecCvtF2XOp_VL>; -def riscv_vfcvt_rm_xu_f_vl : SDNode<"RISCVISD::VFCVT_RM_XU_F_VL", SDT_RISCVVecCvtF2XOp_VL>; -def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>; -def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>; def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>; def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", @@ -767,6 +773,8 @@ } } +// Single width converting + multiclass VPatConvertFP2IVL_V { foreach fvti = AllFloatVectors in { defvar ivti = GetIntVTypeInfo.Vti; @@ -816,6 +824,8 @@ } } +// Widening converting + multiclass VPatWConvertFP2IVL_V { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; @@ -829,6 +839,19 @@ } } +multiclass VPatWConvertFP2I_RM_VL_V { + foreach fvtiToFWti = AllWidenableFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar iwti = GetIntVTypeInfo.Vti; + def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask V0), (XLenVT timm:$frm), + VLOpFrag)), + (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + } +} + multiclass VPatWConvertI2FPVL_V { foreach vtiToWti = AllWidenableIntToFloatVectors in { defvar ivti = vtiToWti.Vti; @@ -842,7 +865,24 @@ } } -multiclass VPatNConvertFP2IVL_V { +multiclass VPatWConvertI2FP_RM_VL_V { + foreach vtiToWti = AllWidenableIntToFloatVectors in { + defvar ivti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), + (ivti.Mask V0), (XLenVT timm:$frm), + VLOpFrag)), + (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") + (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, + (ivti.Mask V0), timm:$frm, GPR:$vl, ivti.Log2SEW, TA_MA)>; + } +} + +// Narrowing converting + +multiclass VPatNConvertFP2IVL_W { + // Reuse the same list of types used in the widening nodes, but just swap the + // direction of types around so we're converting from Wti -> Vti foreach vtiToWti = AllWidenableIntToFloatVectors in { defvar vti = vtiToWti.Vti; defvar fwti = vtiToWti.Wti; @@ -855,7 +895,20 @@ } } -multiclass VPatNConvertI2FPVL_V { +multiclass VPatNConvertFP2I_RM_VL_W { + foreach vtiToWti = AllWidenableIntToFloatVectors in { + defvar vti = vtiToWti.Vti; + defvar fwti = vtiToWti.Wti; + def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask V0), (XLenVT timm:$frm), + VLOpFrag)), + (!cast(instruction_name#"_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask V0), timm:$frm, GPR:$vl, vti.Log2SEW, TA_MA)>; + } +} + +multiclass VPatNConvertI2FPVL_W { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar iwti = GetIntVTypeInfo.Vti; @@ -868,7 +921,7 @@ } } -multiclass VPatNConvertI2FP_RM_VL_V { +multiclass VPatNConvertI2FP_RM_VL_W { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar iwti = GetIntVTypeInfo.Vti; @@ -1740,21 +1793,35 @@ GPR:$vl, fvti.Log2SEW)>; // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions - defm : VPatConvertFP2I_RM_VL_V; - defm : VPatConvertFP2I_RM_VL_V; - defm : VPatConvertFP2IVL_V; defm : VPatConvertFP2IVL_V; - defm : VPatConvertFP2IVL_V; + defm : VPatConvertFP2IVL_V; + defm : VPatConvertFP2I_RM_VL_V; + defm : VPatConvertFP2I_RM_VL_V; + defm : VPatConvertFP2IVL_V; - defm : VPatConvertI2FPVL_V; + defm : VPatConvertFP2IVL_V; + defm : VPatConvertI2FPVL_V; + defm : VPatConvertI2FPVL_V; + defm : VPatConvertI2FP_RM_VL_V; + defm : VPatConvertI2FP_RM_VL_V; // 13.18. Widening Floating-Point/Integer Type-Convert Instructions - defm : VPatWConvertFP2IVL_V; + defm : VPatWConvertFP2IVL_V; + defm : VPatWConvertFP2IVL_V; + defm : VPatWConvertFP2I_RM_VL_V; + defm : VPatWConvertFP2I_RM_VL_V; + defm : VPatWConvertFP2IVL_V; - defm : VPatWConvertI2FPVL_V; + defm : VPatWConvertFP2IVL_V; + defm : VPatWConvertI2FPVL_V; + defm : VPatWConvertI2FPVL_V; + + defm : VPatWConvertI2FP_RM_VL_V; + defm : VPatWConvertI2FP_RM_VL_V; + foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; @@ -1767,12 +1834,20 @@ } // 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions - defm : VPatNConvertFP2IVL_V; - defm : VPatNConvertFP2IVL_V; - defm : VPatNConvertI2FPVL_V; - defm : VPatNConvertI2FPVL_V; - defm : - VPatNConvertI2FP_RM_VL_V; + defm : VPatNConvertFP2IVL_W; + defm : VPatNConvertFP2IVL_W; + defm : VPatNConvertFP2I_RM_VL_W; + defm : VPatNConvertFP2I_RM_VL_W; + + defm : VPatNConvertFP2IVL_W; + defm : VPatNConvertFP2IVL_W; + + defm : VPatNConvertI2FPVL_W; + defm : VPatNConvertI2FPVL_W; + + defm : VPatNConvertI2FP_RM_VL_W; + defm : VPatNConvertI2FP_RM_VL_W; + foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -177,32 +177,14 @@ define @trunc_nxv1f64_to_si32( %x) { ; RV32-LABEL: trunc_nxv1f64_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI4_0) -; RV32-NEXT: fld ft0, %lo(.LCPI4_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f64_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI4_0) -; RV64-NEXT: fld ft0, %lo(.LCPI4_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -214,32 +196,14 @@ define @trunc_nxv1f64_to_ui32( %x) { ; RV32-LABEL: trunc_nxv1f64_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI5_0) -; RV32-NEXT: fld ft0, %lo(.LCPI5_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f64_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI5_0) -; RV64-NEXT: fld ft0, %lo(.LCPI5_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -455,32 +419,14 @@ define @trunc_nxv4f64_to_si32( %x) { ; RV32-LABEL: trunc_nxv4f64_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI12_0) -; RV32-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f64_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI12_0) -; RV64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret @@ -492,32 +438,14 @@ define @trunc_nxv4f64_to_ui32( %x) { ; RV32-LABEL: trunc_nxv4f64_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI13_0) -; RV32-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f64_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI13_0) -; RV64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret @@ -749,37 +677,21 @@ define @ceil_nxv1f64_to_si32( %x) { ; RV32-LABEL: ceil_nxv1f64_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI20_0) -; RV32-NEXT: fld ft0, %lo(.LCPI20_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f64_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-NEXT: fld ft0, %lo(.LCPI20_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f64( %x) @@ -790,37 +702,21 @@ define @ceil_nxv1f64_to_ui32( %x) { ; RV32-LABEL: ceil_nxv1f64_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI21_0) -; RV32-NEXT: fld ft0, %lo(.LCPI21_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f64_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-NEXT: fld ft0, %lo(.LCPI21_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f64( %x) @@ -1063,37 +959,21 @@ define @ceil_nxv4f64_to_si32( %x) { ; RV32-LABEL: ceil_nxv4f64_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI28_0) -; RV32-NEXT: fld ft0, %lo(.LCPI28_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v12, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f64_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI28_0) -; RV64-NEXT: fld ft0, %lo(.LCPI28_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v12, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vfncvt.rtz.x.f.w v12, v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f64( %x) @@ -1104,37 +984,21 @@ define @ceil_nxv4f64_to_ui32( %x) { ; RV32-LABEL: ceil_nxv4f64_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI29_0) -; RV32-NEXT: fld ft0, %lo(.LCPI29_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, ft0 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v12, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f64_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI29_0) -; RV64-NEXT: fld ft0, %lo(.LCPI29_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, ft0 +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v12, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f64( %x) diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -91,32 +91,14 @@ define @trunc_nxv1f32_to_si16( %x) { ; RV32-LABEL: trunc_nxv1f32_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI2_0) -; RV32-NEXT: flw ft0, %lo(.LCPI2_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f32_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI2_0) -; RV64-NEXT: flw ft0, %lo(.LCPI2_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -128,32 +110,14 @@ define @trunc_nxv1f32_to_ui16( %x) { ; RV32-LABEL: trunc_nxv1f32_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI3_0) -; RV32-NEXT: flw ft0, %lo(.LCPI3_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f32_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-NEXT: flw ft0, %lo(.LCPI3_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -199,30 +163,14 @@ define @trunc_nxv1f32_to_si64( %x) { ; RV32-LABEL: trunc_nxv1f32_to_si64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-NEXT: flw ft0, %lo(.LCPI6_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f32_to_si64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI6_0) -; RV64-NEXT: flw ft0, %lo(.LCPI6_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -234,30 +182,14 @@ define @trunc_nxv1f32_to_ui64( %x) { ; RV32-LABEL: trunc_nxv1f32_to_ui64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI7_0) -; RV32-NEXT: flw ft0, %lo(.LCPI7_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f32_to_ui64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI7_0) -; RV64-NEXT: flw ft0, %lo(.LCPI7_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -353,32 +285,14 @@ define @trunc_nxv4f32_to_si16( %x) { ; RV32-LABEL: trunc_nxv4f32_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI10_0) -; RV32-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f32_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI10_0) -; RV64-NEXT: flw ft0, %lo(.LCPI10_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret @@ -390,32 +304,14 @@ define @trunc_nxv4f32_to_ui16( %x) { ; RV32-LABEL: trunc_nxv4f32_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI11_0) -; RV32-NEXT: flw ft0, %lo(.LCPI11_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f32_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI11_0) -; RV64-NEXT: flw ft0, %lo(.LCPI11_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret @@ -461,30 +357,14 @@ define @trunc_nxv4f32_to_si64( %x) { ; RV32-LABEL: trunc_nxv4f32_to_si64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI14_0) -; RV32-NEXT: flw ft0, %lo(.LCPI14_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f32_to_si64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI14_0) -; RV64-NEXT: flw ft0, %lo(.LCPI14_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret @@ -496,30 +376,14 @@ define @trunc_nxv4f32_to_ui64( %x) { ; RV32-LABEL: trunc_nxv4f32_to_ui64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI15_0) -; RV32-NEXT: flw ft0, %lo(.LCPI15_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f32_to_ui64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI15_0) -; RV64-NEXT: flw ft0, %lo(.LCPI15_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret @@ -623,37 +487,21 @@ define @ceil_nxv1f32_to_si16( %x) { ; RV32-LABEL: ceil_nxv1f32_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI18_0) -; RV32-NEXT: flw ft0, %lo(.LCPI18_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f32_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI18_0) -; RV64-NEXT: flw ft0, %lo(.LCPI18_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) @@ -664,37 +512,21 @@ define @ceil_nxv1f32_to_ui16( %x) { ; RV32-LABEL: ceil_nxv1f32_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI19_0) -; RV32-NEXT: flw ft0, %lo(.LCPI19_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f32_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-NEXT: flw ft0, %lo(.LCPI19_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) @@ -751,35 +583,21 @@ define @ceil_nxv1f32_to_si64( %x) { ; RV32-LABEL: ceil_nxv1f32_to_si64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI22_0) -; RV32-NEXT: flw ft0, %lo(.LCPI22_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f32_to_si64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI22_0) -; RV64-NEXT: flw ft0, %lo(.LCPI22_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) @@ -790,35 +608,21 @@ define @ceil_nxv1f32_to_ui64( %x) { ; RV32-LABEL: ceil_nxv1f32_to_ui64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI23_0) -; RV32-NEXT: flw ft0, %lo(.LCPI23_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f32_to_ui64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI23_0) -; RV64-NEXT: flw ft0, %lo(.LCPI23_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f32( %x) @@ -921,37 +725,21 @@ define @ceil_nxv4f32_to_si16( %x) { ; RV32-LABEL: ceil_nxv4f32_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI26_0) -; RV32-NEXT: flw ft0, %lo(.LCPI26_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v10, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vfncvt.rtz.x.f.w v10, v8 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f32_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI26_0) -; RV64-NEXT: flw ft0, %lo(.LCPI26_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v10, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vfncvt.rtz.x.f.w v10, v8 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) @@ -962,37 +750,21 @@ define @ceil_nxv4f32_to_ui16( %x) { ; RV32-LABEL: ceil_nxv4f32_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI27_0) -; RV32-NEXT: flw ft0, %lo(.LCPI27_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v10, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f32_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI27_0) -; RV64-NEXT: flw ft0, %lo(.LCPI27_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v10, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) @@ -1049,35 +821,21 @@ define @ceil_nxv4f32_to_si64( %x) { ; RV32-LABEL: ceil_nxv4f32_to_si64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI30_0) -; RV32-NEXT: flw ft0, %lo(.LCPI30_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfwcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV32-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f32_to_si64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI30_0) -; RV64-NEXT: flw ft0, %lo(.LCPI30_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfwcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV64-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) @@ -1088,35 +846,21 @@ define @ceil_nxv4f32_to_ui64( %x) { ; RV32-LABEL: ceil_nxv4f32_to_ui64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI31_0) -; RV32-NEXT: flw ft0, %lo(.LCPI31_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vfabs.v v10, v8 -; RV32-NEXT: vmflt.vf v0, v10, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV32-NEXT: vfwcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV32-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f32_to_ui64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI31_0) -; RV64-NEXT: flw ft0, %lo(.LCPI31_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vfabs.v v10, v8 -; RV64-NEXT: vmflt.vf v0, v10, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t +; RV64-NEXT: vfwcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; RV64-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f32( %x) diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll @@ -13,32 +13,14 @@ define @trunc_nxv1f16_to_si8( %x) { ; RV32-LABEL: trunc_nxv1f16_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI0_0) -; RV32-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f16_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI0_0) -; RV64-NEXT: flh ft0, %lo(.LCPI0_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -50,32 +32,14 @@ define @trunc_nxv1f16_to_ui8( %x) { ; RV32-LABEL: trunc_nxv1f16_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI1_0) -; RV32-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f16_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI1_0) -; RV64-NEXT: flh ft0, %lo(.LCPI1_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -121,30 +85,14 @@ define @trunc_nxv1f16_to_si32( %x) { ; RV32-LABEL: trunc_nxv1f16_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI4_0) -; RV32-NEXT: flh ft0, %lo(.LCPI4_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f16_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI4_0) -; RV64-NEXT: flh ft0, %lo(.LCPI4_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -156,30 +104,14 @@ define @trunc_nxv1f16_to_ui32( %x) { ; RV32-LABEL: trunc_nxv1f16_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI5_0) -; RV32-NEXT: flh ft0, %lo(.LCPI5_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv1f16_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI5_0) -; RV64-NEXT: flh ft0, %lo(.LCPI5_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -271,32 +203,14 @@ define @trunc_nxv4f16_to_si8( %x) { ; RV32-LABEL: trunc_nxv4f16_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI8_0) -; RV32-NEXT: flh ft0, %lo(.LCPI8_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f16_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI8_0) -; RV64-NEXT: flh ft0, %lo(.LCPI8_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -308,32 +222,14 @@ define @trunc_nxv4f16_to_ui8( %x) { ; RV32-LABEL: trunc_nxv4f16_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI9_0) -; RV32-NEXT: flh ft0, %lo(.LCPI9_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f16_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI9_0) -; RV64-NEXT: flh ft0, %lo(.LCPI9_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret @@ -379,30 +275,14 @@ define @trunc_nxv4f16_to_si32( %x) { ; RV32-LABEL: trunc_nxv4f16_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI12_0) -; RV32-NEXT: flh ft0, %lo(.LCPI12_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; RV32-NEXT: vmv2r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f16_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI12_0) -; RV64-NEXT: flh ft0, %lo(.LCPI12_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: ret @@ -414,30 +294,14 @@ define @trunc_nxv4f16_to_ui32( %x) { ; RV32-LABEL: trunc_nxv4f16_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI13_0) -; RV32-NEXT: flh ft0, %lo(.LCPI13_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 -; RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV32-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; RV32-NEXT: vmv2r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: trunc_nxv4f16_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI13_0) -; RV64-NEXT: flh ft0, %lo(.LCPI13_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 -; RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; RV64-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: ret @@ -529,37 +393,21 @@ define @ceil_nxv1f16_to_si8( %x) { ; RV32-LABEL: ceil_nxv1f16_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI16_0) -; RV32-NEXT: flh ft0, %lo(.LCPI16_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f16_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI16_0) -; RV64-NEXT: flh ft0, %lo(.LCPI16_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) @@ -570,37 +418,21 @@ define @ceil_nxv1f16_to_ui8( %x) { ; RV32-LABEL: ceil_nxv1f16_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI17_0) -; RV32-NEXT: flh ft0, %lo(.LCPI17_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfncvt.xu.f.w v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f16_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI17_0) -; RV64-NEXT: flh ft0, %lo(.LCPI17_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfncvt.xu.f.w v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) @@ -657,35 +489,21 @@ define @ceil_nxv1f16_to_si32( %x) { ; RV32-LABEL: ceil_nxv1f16_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI20_0) -; RV32-NEXT: flh ft0, %lo(.LCPI20_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f16_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-NEXT: flh ft0, %lo(.LCPI20_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) @@ -696,35 +514,21 @@ define @ceil_nxv1f16_to_ui32( %x) { ; RV32-LABEL: ceil_nxv1f16_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI21_0) -; RV32-NEXT: flh ft0, %lo(.LCPI21_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfwcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv1f16_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-NEXT: flh ft0, %lo(.LCPI21_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfwcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv1f16( %x) @@ -823,37 +627,21 @@ define @ceil_nxv4f16_to_si8( %x) { ; RV32-LABEL: ceil_nxv4f16_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI24_0) -; RV32-NEXT: flh ft0, %lo(.LCPI24_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f16_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI24_0) -; RV64-NEXT: flh ft0, %lo(.LCPI24_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x) @@ -864,37 +652,21 @@ define @ceil_nxv4f16_to_ui8( %x) { ; RV32-LABEL: ceil_nxv4f16_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI25_0) -; RV32-NEXT: flh ft0, %lo(.LCPI25_0)(a0) -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f16_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI25_0) -; RV64-NEXT: flh ft0, %lo(.LCPI25_0)(a0) -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfncvt.x.f.w v9, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; RV64-NEXT: vmv1r.v v8, v9 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x) @@ -951,35 +723,21 @@ define @ceil_nxv4f16_to_si32( %x) { ; RV32-LABEL: ceil_nxv4f16_to_si32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI28_0) -; RV32-NEXT: flh ft0, %lo(.LCPI28_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfwcvt.x.f.v v10, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; RV32-NEXT: vmv2r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f16_to_si32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI28_0) -; RV64-NEXT: flh ft0, %lo(.LCPI28_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfwcvt.x.f.v v10, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x) @@ -990,35 +748,21 @@ define @ceil_nxv4f16_to_ui32( %x) { ; RV32-LABEL: ceil_nxv4f16_to_ui32: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI29_0) -; RV32-NEXT: flh ft0, %lo(.LCPI29_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, ft0 +; RV32-NEXT: vmset.m v0 ; RV32-NEXT: fsrmi a0, 3 -; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV32-NEXT: vfwcvt.x.f.v v10, v8, v0.t ; RV32-NEXT: fsrm a0 -; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV32-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; RV32-NEXT: vmv2r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: ceil_nxv4f16_to_ui32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI29_0) -; RV64-NEXT: flh ft0, %lo(.LCPI29_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, ft0 +; RV64-NEXT: vmset.m v0 ; RV64-NEXT: fsrmi a0, 3 -; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t +; RV64-NEXT: vfwcvt.x.f.v v10, v8, v0.t ; RV64-NEXT: fsrm a0 -; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; RV64-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: ret %a = call @llvm.ceil.nxv4f16( %x)