diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3584,6 +3584,12 @@ Known = KnownBits::smin(Known, Known2); break; } + case ISD::FP_TO_UINT_SAT: { + // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT. + EVT VT = cast(Op.getOperand(1))->getVT(); + Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits()); + break; + } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: if (Op.getResNo() == 1) { // The boolean result conforms to getBooleanContents. @@ -3860,6 +3866,10 @@ break; } + case ISD::FP_TO_SINT_SAT: + // FP_TO_SINT_SAT produces a signed value that fits in the saturating VT. + Tmp = cast(Op.getOperand(1))->getVT().getScalarSizeInBits(); + return VTBits - Tmp + 1; case ISD::SIGN_EXTEND: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp; diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll --- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -631,7 +631,6 @@ ; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v0.4s, #255, lsl #24 ; CHECK-NEXT: ret %mul.i = fmul <4 x float> %f, %vcvt.i = call <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float> %mul.i) diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -22,8 +22,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, s0 ; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: csinc w8, w8, wzr, lo -; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: csinc w0, w8, wzr, lo ; CHECK-NEXT: ret %x = call i1 @llvm.fptoui.sat.i1.f32(float %f) ret i1 %x @@ -172,8 +171,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: csinc w8, w8, wzr, lo -; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: csinc w0, w8, wzr, lo ; CHECK-NEXT: ret %x = call i1 @llvm.fptoui.sat.i1.f64(double %f) ret i1 %x @@ -323,16 +321,14 @@ ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvtzu w8, s0 ; CHECK-CVT-NEXT: cmp w8, #1 -; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo -; CHECK-CVT-NEXT: and w0, w8, #0x1 +; CHECK-CVT-NEXT: csinc w0, w8, wzr, lo ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_i1_f16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: cmp w8, #1 -; CHECK-FP16-NEXT: csinc w8, w8, wzr, lo -; CHECK-FP16-NEXT: and w0, w8, #0x1 +; CHECK-FP16-NEXT: csinc w0, w8, wzr, lo ; CHECK-FP16-NEXT: ret %x = call i1 @llvm.fptoui.sat.i1.f16(half %f) ret i1 %x diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -1602,7 +1602,8 @@ ; RV32IFD-NEXT: bnez a0, .LBB26_2 ; RV32IFD-NEXT: # %bb.1: # %start ; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: j .LBB26_3 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB26_2: ; RV32IFD-NEXT: lui a0, %hi(.LCPI26_0) ; RV32IFD-NEXT: fld ft1, %lo(.LCPI26_0)(a0) @@ -1611,9 +1612,6 @@ ; RV32IFD-NEXT: fmax.d ft0, ft0, ft1 ; RV32IFD-NEXT: fmin.d ft0, ft0, ft2 ; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz -; RV32IFD-NEXT: .LBB26_3: # %start -; RV32IFD-NEXT: slli a0, a0, 16 -; RV32IFD-NEXT: srai a0, a0, 16 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -1624,7 +1622,7 @@ ; RV64IFD-NEXT: bnez a0, .LBB26_2 ; RV64IFD-NEXT: # %bb.1: # %start ; RV64IFD-NEXT: li a0, 0 -; RV64IFD-NEXT: j .LBB26_3 +; RV64IFD-NEXT: ret ; RV64IFD-NEXT: .LBB26_2: ; RV64IFD-NEXT: lui a0, %hi(.LCPI26_0) ; RV64IFD-NEXT: fld ft1, %lo(.LCPI26_0)(a0) @@ -1633,9 +1631,6 @@ ; RV64IFD-NEXT: fmax.d ft0, ft0, ft1 ; RV64IFD-NEXT: fmin.d ft0, ft0, ft2 ; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz -; RV64IFD-NEXT: .LBB26_3: # %start -; RV64IFD-NEXT: slli a0, a0, 48 -; RV64IFD-NEXT: srai a0, a0, 48 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -1798,9 +1793,6 @@ ; RV32IFD-NEXT: fmax.d ft0, ft0, ft2 ; RV32IFD-NEXT: fmin.d ft0, ft0, ft1 ; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz -; RV32IFD-NEXT: lui a1, 16 -; RV32IFD-NEXT: addi a1, a1, -1 -; RV32IFD-NEXT: and a0, a0, a1 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -1813,9 +1805,6 @@ ; RV64IFD-NEXT: fmax.d ft1, ft1, ft2 ; RV64IFD-NEXT: fmin.d ft0, ft1, ft0 ; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz -; RV64IFD-NEXT: lui a1, 16 -; RV64IFD-NEXT: addiw a1, a1, -1 -; RV64IFD-NEXT: and a0, a0, a1 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_s_sat_i16: @@ -1956,7 +1945,8 @@ ; RV32IFD-NEXT: bnez a0, .LBB30_2 ; RV32IFD-NEXT: # %bb.1: # %start ; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: j .LBB30_3 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB30_2: ; RV32IFD-NEXT: lui a0, %hi(.LCPI30_0) ; RV32IFD-NEXT: fld ft1, %lo(.LCPI30_0)(a0) @@ -1965,9 +1955,6 @@ ; RV32IFD-NEXT: fmax.d ft0, ft0, ft1 ; RV32IFD-NEXT: fmin.d ft0, ft0, ft2 ; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz -; RV32IFD-NEXT: .LBB30_3: # %start -; RV32IFD-NEXT: slli a0, a0, 24 -; RV32IFD-NEXT: srai a0, a0, 24 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -1978,7 +1965,7 @@ ; RV64IFD-NEXT: bnez a0, .LBB30_2 ; RV64IFD-NEXT: # %bb.1: # %start ; RV64IFD-NEXT: li a0, 0 -; RV64IFD-NEXT: j .LBB30_3 +; RV64IFD-NEXT: ret ; RV64IFD-NEXT: .LBB30_2: ; RV64IFD-NEXT: lui a0, %hi(.LCPI30_0) ; RV64IFD-NEXT: fld ft1, %lo(.LCPI30_0)(a0) @@ -1987,9 +1974,6 @@ ; RV64IFD-NEXT: fmax.d ft0, ft0, ft1 ; RV64IFD-NEXT: fmin.d ft0, ft0, ft2 ; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz -; RV64IFD-NEXT: .LBB30_3: # %start -; RV64IFD-NEXT: slli a0, a0, 56 -; RV64IFD-NEXT: srai a0, a0, 56 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i8: @@ -2154,7 +2138,6 @@ ; RV32IFD-NEXT: fmax.d ft0, ft0, ft2 ; RV32IFD-NEXT: fmin.d ft0, ft0, ft1 ; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz -; RV32IFD-NEXT: andi a0, a0, 255 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -2167,7 +2150,6 @@ ; RV64IFD-NEXT: fmax.d ft1, ft1, ft2 ; RV64IFD-NEXT: fmin.d ft0, ft1, ft0 ; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz -; RV64IFD-NEXT: andi a0, a0, 255 ; RV64IFD-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_s_sat_i8: diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -1385,7 +1385,7 @@ ; RV32IF-NEXT: bnez a0, .LBB24_2 ; RV32IF-NEXT: # %bb.1: # %start ; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: j .LBB24_3 +; RV32IF-NEXT: ret ; RV32IF-NEXT: .LBB24_2: ; RV32IF-NEXT: lui a0, %hi(.LCPI24_0) ; RV32IF-NEXT: flw ft1, %lo(.LCPI24_0)(a0) @@ -1394,9 +1394,6 @@ ; RV32IF-NEXT: fmax.s ft0, ft0, ft1 ; RV32IF-NEXT: fmin.s ft0, ft0, ft2 ; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IF-NEXT: .LBB24_3: # %start -; RV32IF-NEXT: slli a0, a0, 16 -; RV32IF-NEXT: srai a0, a0, 16 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_w_s_sat_i16: @@ -1406,7 +1403,7 @@ ; RV64IF-NEXT: bnez a0, .LBB24_2 ; RV64IF-NEXT: # %bb.1: # %start ; RV64IF-NEXT: li a0, 0 -; RV64IF-NEXT: j .LBB24_3 +; RV64IF-NEXT: ret ; RV64IF-NEXT: .LBB24_2: ; RV64IF-NEXT: lui a0, %hi(.LCPI24_0) ; RV64IF-NEXT: flw ft1, %lo(.LCPI24_0)(a0) @@ -1415,9 +1412,6 @@ ; RV64IF-NEXT: fmax.s ft0, ft0, ft1 ; RV64IF-NEXT: fmin.s ft0, ft0, ft2 ; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IF-NEXT: .LBB24_3: # %start -; RV64IF-NEXT: slli a0, a0, 48 -; RV64IF-NEXT: srai a0, a0, 48 ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -1561,9 +1555,6 @@ ; RV32IF-NEXT: fmax.s ft1, ft1, ft2 ; RV32IF-NEXT: fmin.s ft0, ft1, ft0 ; RV32IF-NEXT: fcvt.wu.s a0, ft0, rtz -; RV32IF-NEXT: lui a1, 16 -; RV32IF-NEXT: addi a1, a1, -1 -; RV32IF-NEXT: and a0, a0, a1 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_wu_s_sat_i16: @@ -1575,9 +1566,6 @@ ; RV64IF-NEXT: fmax.s ft1, ft1, ft2 ; RV64IF-NEXT: fmin.s ft0, ft1, ft0 ; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IF-NEXT: lui a1, 16 -; RV64IF-NEXT: addiw a1, a1, -1 -; RV64IF-NEXT: and a0, a0, a1 ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_s_sat_i16: @@ -1701,7 +1689,7 @@ ; RV32IF-NEXT: bnez a0, .LBB28_2 ; RV32IF-NEXT: # %bb.1: # %start ; RV32IF-NEXT: li a0, 0 -; RV32IF-NEXT: j .LBB28_3 +; RV32IF-NEXT: ret ; RV32IF-NEXT: .LBB28_2: ; RV32IF-NEXT: lui a0, %hi(.LCPI28_0) ; RV32IF-NEXT: flw ft1, %lo(.LCPI28_0)(a0) @@ -1710,9 +1698,6 @@ ; RV32IF-NEXT: fmax.s ft0, ft0, ft1 ; RV32IF-NEXT: fmin.s ft0, ft0, ft2 ; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IF-NEXT: .LBB28_3: # %start -; RV32IF-NEXT: slli a0, a0, 24 -; RV32IF-NEXT: srai a0, a0, 24 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_w_s_sat_i8: @@ -1722,7 +1707,7 @@ ; RV64IF-NEXT: bnez a0, .LBB28_2 ; RV64IF-NEXT: # %bb.1: # %start ; RV64IF-NEXT: li a0, 0 -; RV64IF-NEXT: j .LBB28_3 +; RV64IF-NEXT: ret ; RV64IF-NEXT: .LBB28_2: ; RV64IF-NEXT: lui a0, %hi(.LCPI28_0) ; RV64IF-NEXT: flw ft1, %lo(.LCPI28_0)(a0) @@ -1731,9 +1716,6 @@ ; RV64IF-NEXT: fmax.s ft0, ft0, ft1 ; RV64IF-NEXT: fmin.s ft0, ft0, ft2 ; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IF-NEXT: .LBB28_3: # %start -; RV64IF-NEXT: slli a0, a0, 56 -; RV64IF-NEXT: srai a0, a0, 56 ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i8: @@ -1875,7 +1857,6 @@ ; RV32IF-NEXT: fmax.s ft1, ft1, ft2 ; RV32IF-NEXT: fmin.s ft0, ft1, ft0 ; RV32IF-NEXT: fcvt.wu.s a0, ft0, rtz -; RV32IF-NEXT: andi a0, a0, 255 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_wu_s_sat_i8: @@ -1887,7 +1868,6 @@ ; RV64IF-NEXT: fmax.s ft1, ft1, ft2 ; RV64IF-NEXT: fmin.s ft0, ft1, ft0 ; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IF-NEXT: andi a0, a0, 255 ; RV64IF-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_s_sat_i8: diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -2464,7 +2464,7 @@ ; RV32IZFH-NEXT: bnez a0, .LBB32_2 ; RV32IZFH-NEXT: # %bb.1: # %start ; RV32IZFH-NEXT: li a0, 0 -; RV32IZFH-NEXT: j .LBB32_3 +; RV32IZFH-NEXT: ret ; RV32IZFH-NEXT: .LBB32_2: ; RV32IZFH-NEXT: lui a0, %hi(.LCPI32_0) ; RV32IZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0) @@ -2473,9 +2473,6 @@ ; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1 ; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2 ; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IZFH-NEXT: .LBB32_3: # %start -; RV32IZFH-NEXT: slli a0, a0, 16 -; RV32IZFH-NEXT: srai a0, a0, 16 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_w_s_sat_i16: @@ -2485,7 +2482,7 @@ ; RV64IZFH-NEXT: bnez a0, .LBB32_2 ; RV64IZFH-NEXT: # %bb.1: # %start ; RV64IZFH-NEXT: li a0, 0 -; RV64IZFH-NEXT: j .LBB32_3 +; RV64IZFH-NEXT: ret ; RV64IZFH-NEXT: .LBB32_2: ; RV64IZFH-NEXT: lui a0, %hi(.LCPI32_0) ; RV64IZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0) @@ -2494,9 +2491,6 @@ ; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1 ; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2 ; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IZFH-NEXT: .LBB32_3: # %start -; RV64IZFH-NEXT: slli a0, a0, 48 -; RV64IZFH-NEXT: srai a0, a0, 48 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_w_s_sat_i16: @@ -2506,7 +2500,7 @@ ; RV32IDZFH-NEXT: bnez a0, .LBB32_2 ; RV32IDZFH-NEXT: # %bb.1: # %start ; RV32IDZFH-NEXT: li a0, 0 -; RV32IDZFH-NEXT: j .LBB32_3 +; RV32IDZFH-NEXT: ret ; RV32IDZFH-NEXT: .LBB32_2: ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI32_0) ; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0) @@ -2515,9 +2509,6 @@ ; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1 ; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2 ; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IDZFH-NEXT: .LBB32_3: # %start -; RV32IDZFH-NEXT: slli a0, a0, 16 -; RV32IDZFH-NEXT: srai a0, a0, 16 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_w_s_sat_i16: @@ -2527,7 +2518,7 @@ ; RV64IDZFH-NEXT: bnez a0, .LBB32_2 ; RV64IDZFH-NEXT: # %bb.1: # %start ; RV64IDZFH-NEXT: li a0, 0 -; RV64IDZFH-NEXT: j .LBB32_3 +; RV64IDZFH-NEXT: ret ; RV64IDZFH-NEXT: .LBB32_2: ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI32_0) ; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI32_0)(a0) @@ -2536,9 +2527,6 @@ ; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1 ; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2 ; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IDZFH-NEXT: .LBB32_3: # %start -; RV64IDZFH-NEXT: slli a0, a0, 48 -; RV64IDZFH-NEXT: srai a0, a0, 48 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -2705,9 +2693,6 @@ ; RV32IZFH-NEXT: fmax.s ft1, ft1, ft2 ; RV32IZFH-NEXT: fmin.s ft0, ft1, ft0 ; RV32IZFH-NEXT: fcvt.wu.s a0, ft0, rtz -; RV32IZFH-NEXT: lui a1, 16 -; RV32IZFH-NEXT: addi a1, a1, -1 -; RV32IZFH-NEXT: and a0, a0, a1 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_wu_s_sat_i16: @@ -2719,9 +2704,6 @@ ; RV64IZFH-NEXT: fmax.s ft1, ft1, ft2 ; RV64IZFH-NEXT: fmin.s ft0, ft1, ft0 ; RV64IZFH-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IZFH-NEXT: lui a1, 16 -; RV64IZFH-NEXT: addiw a1, a1, -1 -; RV64IZFH-NEXT: and a0, a0, a1 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_wu_s_sat_i16: @@ -2733,9 +2715,6 @@ ; RV32IDZFH-NEXT: fmax.s ft1, ft1, ft2 ; RV32IDZFH-NEXT: fmin.s ft0, ft1, ft0 ; RV32IDZFH-NEXT: fcvt.wu.s a0, ft0, rtz -; RV32IDZFH-NEXT: lui a1, 16 -; RV32IDZFH-NEXT: addi a1, a1, -1 -; RV32IDZFH-NEXT: and a0, a0, a1 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_wu_s_sat_i16: @@ -2747,9 +2726,6 @@ ; RV64IDZFH-NEXT: fmax.s ft1, ft1, ft2 ; RV64IDZFH-NEXT: fmin.s ft0, ft1, ft0 ; RV64IDZFH-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IDZFH-NEXT: lui a1, 16 -; RV64IDZFH-NEXT: addiw a1, a1, -1 -; RV64IDZFH-NEXT: and a0, a0, a1 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_s_sat_i16: @@ -2896,7 +2872,7 @@ ; RV32IZFH-NEXT: bnez a0, .LBB36_2 ; RV32IZFH-NEXT: # %bb.1: # %start ; RV32IZFH-NEXT: li a0, 0 -; RV32IZFH-NEXT: j .LBB36_3 +; RV32IZFH-NEXT: ret ; RV32IZFH-NEXT: .LBB36_2: ; RV32IZFH-NEXT: lui a0, %hi(.LCPI36_0) ; RV32IZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0) @@ -2905,9 +2881,6 @@ ; RV32IZFH-NEXT: fmax.s ft0, ft0, ft1 ; RV32IZFH-NEXT: fmin.s ft0, ft0, ft2 ; RV32IZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IZFH-NEXT: .LBB36_3: # %start -; RV32IZFH-NEXT: slli a0, a0, 24 -; RV32IZFH-NEXT: srai a0, a0, 24 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_w_s_sat_i8: @@ -2917,7 +2890,7 @@ ; RV64IZFH-NEXT: bnez a0, .LBB36_2 ; RV64IZFH-NEXT: # %bb.1: # %start ; RV64IZFH-NEXT: li a0, 0 -; RV64IZFH-NEXT: j .LBB36_3 +; RV64IZFH-NEXT: ret ; RV64IZFH-NEXT: .LBB36_2: ; RV64IZFH-NEXT: lui a0, %hi(.LCPI36_0) ; RV64IZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0) @@ -2926,9 +2899,6 @@ ; RV64IZFH-NEXT: fmax.s ft0, ft0, ft1 ; RV64IZFH-NEXT: fmin.s ft0, ft0, ft2 ; RV64IZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IZFH-NEXT: .LBB36_3: # %start -; RV64IZFH-NEXT: slli a0, a0, 56 -; RV64IZFH-NEXT: srai a0, a0, 56 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_w_s_sat_i8: @@ -2938,7 +2908,7 @@ ; RV32IDZFH-NEXT: bnez a0, .LBB36_2 ; RV32IDZFH-NEXT: # %bb.1: # %start ; RV32IDZFH-NEXT: li a0, 0 -; RV32IDZFH-NEXT: j .LBB36_3 +; RV32IDZFH-NEXT: ret ; RV32IDZFH-NEXT: .LBB36_2: ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI36_0) ; RV32IDZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0) @@ -2947,9 +2917,6 @@ ; RV32IDZFH-NEXT: fmax.s ft0, ft0, ft1 ; RV32IDZFH-NEXT: fmin.s ft0, ft0, ft2 ; RV32IDZFH-NEXT: fcvt.w.s a0, ft0, rtz -; RV32IDZFH-NEXT: .LBB36_3: # %start -; RV32IDZFH-NEXT: slli a0, a0, 24 -; RV32IDZFH-NEXT: srai a0, a0, 24 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_w_s_sat_i8: @@ -2959,7 +2926,7 @@ ; RV64IDZFH-NEXT: bnez a0, .LBB36_2 ; RV64IDZFH-NEXT: # %bb.1: # %start ; RV64IDZFH-NEXT: li a0, 0 -; RV64IDZFH-NEXT: j .LBB36_3 +; RV64IDZFH-NEXT: ret ; RV64IDZFH-NEXT: .LBB36_2: ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI36_0) ; RV64IDZFH-NEXT: flw ft1, %lo(.LCPI36_0)(a0) @@ -2968,9 +2935,6 @@ ; RV64IDZFH-NEXT: fmax.s ft0, ft0, ft1 ; RV64IDZFH-NEXT: fmin.s ft0, ft0, ft2 ; RV64IDZFH-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IDZFH-NEXT: .LBB36_3: # %start -; RV64IDZFH-NEXT: slli a0, a0, 56 -; RV64IDZFH-NEXT: srai a0, a0, 56 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i8: @@ -3136,7 +3100,6 @@ ; RV32IZFH-NEXT: fmax.s ft1, ft1, ft2 ; RV32IZFH-NEXT: fmin.s ft0, ft1, ft0 ; RV32IZFH-NEXT: fcvt.wu.s a0, ft0, rtz -; RV32IZFH-NEXT: andi a0, a0, 255 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_wu_s_sat_i8: @@ -3148,7 +3111,6 @@ ; RV64IZFH-NEXT: fmax.s ft1, ft1, ft2 ; RV64IZFH-NEXT: fmin.s ft0, ft1, ft0 ; RV64IZFH-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IZFH-NEXT: andi a0, a0, 255 ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_wu_s_sat_i8: @@ -3160,7 +3122,6 @@ ; RV32IDZFH-NEXT: fmax.s ft1, ft1, ft2 ; RV32IDZFH-NEXT: fmin.s ft0, ft1, ft0 ; RV32IDZFH-NEXT: fcvt.wu.s a0, ft0, rtz -; RV32IDZFH-NEXT: andi a0, a0, 255 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_wu_s_sat_i8: @@ -3172,7 +3133,6 @@ ; RV64IDZFH-NEXT: fmax.s ft1, ft1, ft2 ; RV64IDZFH-NEXT: fmin.s ft0, ft1, ft0 ; RV64IDZFH-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64IDZFH-NEXT: andi a0, a0, 255 ; RV64IDZFH-NEXT: ret ; ; RV32I-LABEL: fcvt_wu_s_sat_i8: diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -3796,82 +3796,73 @@ define arm_aapcs_vfpcc <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i19: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vldr s4, .LCPI46_0 -; CHECK-NEXT: vcvtb.f32.f16 s8, s1 -; CHECK-NEXT: vcvtt.f32.f16 s12, s1 -; CHECK-NEXT: vcvtt.f32.f16 s1, s3 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vldr s6, .LCPI46_1 -; CHECK-NEXT: vmaxnm.f32 s1, s1, s4 -; CHECK-NEXT: vcvtb.f32.f16 s10, s2 -; CHECK-NEXT: vcvtb.f32.f16 s14, s0 -; CHECK-NEXT: vminnm.f32 s1, s1, s6 +; CHECK-NEXT: vcvtb.f32.f16 s8, s0 +; CHECK-NEXT: vcvtb.f32.f16 s12, s2 +; CHECK-NEXT: vcvtb.f32.f16 s10, s1 +; CHECK-NEXT: vcvtt.f32.f16 s14, s1 +; CHECK-NEXT: vcvtb.f32.f16 s1, s3 ; CHECK-NEXT: vcvtt.f32.f16 s0, s0 -; CHECK-NEXT: vcvt.u32.f32 s1, s1 ; CHECK-NEXT: vcvtt.f32.f16 s2, s2 -; CHECK-NEXT: vcvtb.f32.f16 s3, s3 -; CHECK-NEXT: vmaxnm.f32 s8, s8, s4 -; CHECK-NEXT: vmaxnm.f32 s10, s10, s4 -; CHECK-NEXT: vmaxnm.f32 s12, s12, s4 -; CHECK-NEXT: vmaxnm.f32 s14, s14, s4 -; CHECK-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-NEXT: vmaxnm.f32 s2, s2, s4 -; CHECK-NEXT: vmaxnm.f32 s4, s3, s4 -; CHECK-NEXT: vminnm.f32 s4, s4, s6 -; CHECK-NEXT: vminnm.f32 s2, s2, s6 +; CHECK-NEXT: vldr s4, .LCPI46_0 +; CHECK-NEXT: vcvtt.f32.f16 s3, s3 +; CHECK-NEXT: vmaxnm.f32 s8, s8, s6 +; CHECK-NEXT: vmaxnm.f32 s10, s10, s6 +; CHECK-NEXT: vmaxnm.f32 s0, s0, s6 +; CHECK-NEXT: vmaxnm.f32 s12, s12, s6 +; CHECK-NEXT: vmaxnm.f32 s14, s14, s6 +; CHECK-NEXT: vmaxnm.f32 s2, s2, s6 +; CHECK-NEXT: vmaxnm.f32 s1, s1, s6 +; CHECK-NEXT: vmaxnm.f32 s6, s3, s6 +; CHECK-NEXT: vminnm.f32 s8, s8, s4 +; CHECK-NEXT: vminnm.f32 s10, s10, s4 +; CHECK-NEXT: vminnm.f32 s0, s0, s4 +; CHECK-NEXT: vminnm.f32 s12, s12, s4 +; CHECK-NEXT: vminnm.f32 s14, s14, s4 +; CHECK-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-NEXT: vminnm.f32 s1, s1, s4 +; CHECK-NEXT: vminnm.f32 s4, s6, s4 +; CHECK-NEXT: vcvt.u32.f32 s1, s1 ; CHECK-NEXT: vcvt.u32.f32 s4, s4 -; CHECK-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-NEXT: vmov r1, s1 -; CHECK-NEXT: vminnm.f32 s14, s14, s6 ; CHECK-NEXT: vcvt.u32.f32 s2, s2 -; CHECK-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-NEXT: vcvt.u32.f32 s0, s0 -; CHECK-NEXT: vminnm.f32 s12, s12, s6 ; CHECK-NEXT: vcvt.u32.f32 s14, s14 -; CHECK-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-NEXT: vcvt.u32.f32 s10, s10 ; CHECK-NEXT: vcvt.u32.f32 s12, s12 +; CHECK-NEXT: vcvt.u32.f32 s0, s0 +; CHECK-NEXT: vcvt.u32.f32 s10, s10 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: vcvt.u32.f32 s8, s8 -; CHECK-NEXT: vmov r12, s2 -; CHECK-NEXT: vmov lr, s0 -; CHECK-NEXT: lsrs r2, r1, #11 -; CHECK-NEXT: strb r2, [r0, #18] -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: bfc r12, #19, #13 -; CHECK-NEXT: bfc lr, #19, #13 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: lsrs r3, r2, #14 -; CHECK-NEXT: orr.w r1, r3, r1, lsl #5 -; CHECK-NEXT: lsr.w r3, r12, #1 -; CHECK-NEXT: orr.w r2, r3, r2, lsl #18 -; CHECK-NEXT: vmov r3, s14 -; CHECK-NEXT: strh r1, [r0, #16] -; CHECK-NEXT: vmov r1, s10 -; CHECK-NEXT: str r2, [r0, #12] -; CHECK-NEXT: bfc r3, #19, #13 -; CHECK-NEXT: orr.w r3, r3, lr, lsl #19 -; CHECK-NEXT: str r3, [r0] -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: bfc r1, #19, #13 -; CHECK-NEXT: bfc r3, #19, #13 -; CHECK-NEXT: lsrs r2, r3, #7 -; CHECK-NEXT: orr.w r1, r2, r1, lsl #12 -; CHECK-NEXT: orr.w r1, r1, r12, lsl #31 -; CHECK-NEXT: str r1, [r0, #8] -; CHECK-NEXT: vmov r1, s8 -; CHECK-NEXT: lsr.w r2, lr, #13 -; CHECK-NEXT: bfc r1, #19, #13 -; CHECK-NEXT: orr.w r1, r2, r1, lsl #6 -; CHECK-NEXT: orr.w r1, r1, r3, lsl #25 -; CHECK-NEXT: str r1, [r0, #4] -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: vmov r4, s12 +; CHECK-NEXT: vmov r5, s10 +; CHECK-NEXT: lsrs r2, r1, #14 +; CHECK-NEXT: orr.w r12, r2, r3, lsl #5 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: strh.w r12, [r0, #16] +; CHECK-NEXT: lsrs r2, r3, #1 +; CHECK-NEXT: orr.w lr, r2, r1, lsl #18 +; CHECK-NEXT: vmov r2, s14 +; CHECK-NEXT: lsrs r1, r2, #7 +; CHECK-NEXT: orr.w r1, r1, r4, lsl #12 +; CHECK-NEXT: orr.w r1, r1, r3, lsl #31 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: lsrs r4, r3, #13 +; CHECK-NEXT: orr.w r4, r4, r5, lsl #6 +; CHECK-NEXT: orr.w r2, r4, r2, lsl #25 +; CHECK-NEXT: vmov r4, s8 +; CHECK-NEXT: orr.w r3, r4, r3, lsl #19 +; CHECK-NEXT: strd r3, r2, [r0] +; CHECK-NEXT: strd r1, lr, [r0, #8] +; CHECK-NEXT: lsr.w r1, r12, #16 +; CHECK-NEXT: strb r1, [r0, #18] +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI46_0: -; CHECK-NEXT: .long 0x00000000 @ float 0 -; CHECK-NEXT: .LCPI46_1: ; CHECK-NEXT: .long 0x48ffffe0 @ float 524287 +; CHECK-NEXT: .LCPI46_1: +; CHECK-NEXT: .long 0x00000000 @ float 0 %x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x } diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll @@ -1114,7 +1114,6 @@ ; CHECK-NEXT: vmov.i32 q1, #0xffffff ; CHECK-NEXT: vcvt.u32.f32 q0, q0, #23 ; CHECK-NEXT: vmin.u32 q0, q0, q1 -; CHECK-NEXT: vbic.i32 q0, #0xff000000 ; CHECK-NEXT: bx lr %2 = fmul fast <4 x float> %0, %3 = call <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float> %2)