diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4786,12 +4786,14 @@ } // Function to calculate whether the Min/Max pair of SDNodes (potentially -// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1) -// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp -// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The -// operands are the same as SimplifySelectCC. N0getAPIntValue(); const APInt &MaxC = MaxCOp->getAPIntValue(); APInt MinCPlus1 = MinC + 1; - if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2()) - return SDValue(); - BW = MinCPlus1.exactLogBase2() + 1; - return N02; + if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) { + BW = MinCPlus1.exactLogBase2() + 1; + Unsigned = false; + return N02; + } + + if (MaxC == 0 && MinCPlus1.isPowerOf2()) { + BW = MinCPlus1.exactLogBase2(); + Unsigned = true; + return N02; + } + + return SDValue(); } static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG) { unsigned BW; - SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW); + bool Unsigned; + SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned); if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT) return SDValue(); EVT FPVT = Fp.getOperand(0).getValueType(); @@ -4876,13 +4888,14 @@ if (FPVT.isVector()) NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, FPVT.getVectorElementCount()); - if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat( - ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT)) + unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT; + if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT)) return SDValue(); SDLoc DL(Fp); - SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0), + SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0), DAG.getValueType(NewVT.getScalarType())); - return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); + return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0)) + : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); } SDValue DAGCombiner::visitIMINMAX(SDNode *N) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1100,6 +1100,12 @@ bool shouldSplatInsEltVarIndex(EVT VT) const override; + bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { + // Converting to sat variants holds little benefit on X86 as we will just + // need to saturate the value back using fp arithmatic. + return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); + } + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -38,12 +38,7 @@ define i32 @ustest_f64i32(double %x) { ; CHECK-LABEL: ustest_f64i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel w0, w8, wzr, gt +; CHECK-NEXT: fcvtzu w0, d0 ; CHECK-NEXT: ret entry: %conv = fptosi double %x to i64 @@ -89,12 +84,7 @@ define i32 @ustest_f32i32(float %x) { ; CHECK-LABEL: ustest_f32i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel w0, w8, wzr, gt +; CHECK-NEXT: fcvtzu w0, s0 ; CHECK-NEXT: ret entry: %conv = fptosi float %x to i64 @@ -156,22 +146,12 @@ ; CHECK-CVT-LABEL: ustest_f16i32: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #-1 -; CHECK-CVT-NEXT: fcvtzs x8, s0 -; CHECK-CVT-NEXT: cmp x8, x9 -; CHECK-CVT-NEXT: csel x8, x8, x9, lt -; CHECK-CVT-NEXT: cmp x8, #0 -; CHECK-CVT-NEXT: csel w0, w8, wzr, gt +; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: ustest_f16i32: ; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: fcvtzs x8, h0 -; CHECK-FP16-NEXT: mov w9, #-1 -; CHECK-FP16-NEXT: cmp x8, x9 -; CHECK-FP16-NEXT: csel x8, x8, x9, lt -; CHECK-FP16-NEXT: cmp x8, #0 -; CHECK-FP16-NEXT: csel w0, w8, wzr, gt +; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret entry: %conv = fptosi half %x to i64 @@ -617,13 +597,7 @@ define i32 @ustest_f64i32_mm(double %x) { ; CHECK-LABEL: ustest_f64i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel x0, x8, xzr, gt -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fcvtzu w0, d0 ; CHECK-NEXT: ret entry: %conv = fptosi double %x to i64 @@ -665,13 +639,7 @@ define i32 @ustest_f32i32_mm(float %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel x0, x8, xzr, gt -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fcvtzu w0, s0 ; CHECK-NEXT: ret entry: %conv = fptosi float %x to i64 @@ -730,24 +698,12 @@ ; CHECK-CVT-LABEL: ustest_f16i32_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #-1 -; CHECK-CVT-NEXT: fcvtzs x8, s0 -; CHECK-CVT-NEXT: cmp x8, x9 -; CHECK-CVT-NEXT: csel x8, x8, x9, lt -; CHECK-CVT-NEXT: cmp x8, #0 -; CHECK-CVT-NEXT: csel x0, x8, xzr, gt -; CHECK-CVT-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: ustest_f16i32_mm: ; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: fcvtzs x8, h0 -; CHECK-FP16-NEXT: mov w9, #-1 -; CHECK-FP16-NEXT: cmp x8, x9 -; CHECK-FP16-NEXT: csel x8, x8, x9, lt -; CHECK-FP16-NEXT: cmp x8, #0 -; CHECK-FP16-NEXT: csel x0, x8, xzr, gt -; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret entry: %conv = fptosi half %x to i64 diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -45,13 +45,12 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: cmgt v1.2d, v0.2d, #0 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -106,21 +105,7 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl v2.2d, v0.2s -; CHECK-NEXT: fcvtl2 v0.2d, v0.4s -; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d -; CHECK-NEXT: cmgt v4.2d, v1.2d, v0.2d -; CHECK-NEXT: bif v2.16b, v1.16b, v3.16b -; CHECK-NEXT: bif v0.16b, v1.16b, v4.16b -; CHECK-NEXT: cmgt v1.2d, v2.2d, #0 -; CHECK-NEXT: cmgt v3.2d, v0.2d, #0 -; CHECK-NEXT: and v1.16b, v2.16b, v1.16b -; CHECK-NEXT: and v2.16b, v0.16b, v3.16b -; CHECK-NEXT: xtn v0.2s, v1.2d -; CHECK-NEXT: xtn2 v0.4s, v2.2d +; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: ret entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -211,63 +196,11 @@ } define <4 x i32> @ustest_f16i32(<4 x half> %x) { -; CHECK-CVT-LABEL: ustest_f16i32: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs x9, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s3 -; CHECK-CVT-NEXT: fmov d3, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s0 -; CHECK-CVT-NEXT: mov v2.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x9 -; CHECK-CVT-NEXT: cmgt v0.2d, v1.2d, v2.2d -; CHECK-CVT-NEXT: cmgt v4.2d, v1.2d, v3.2d -; CHECK-CVT-NEXT: bsl v0.16b, v2.16b, v1.16b -; CHECK-CVT-NEXT: bit v1.16b, v3.16b, v4.16b -; CHECK-CVT-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-CVT-NEXT: xtn v0.2s, v0.2d -; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: ustest_f16i32: -; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs x8, h0 -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-NEXT: fcvtzs x9, h2 -; CHECK-FP16-NEXT: fmov d2, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h3 -; CHECK-FP16-NEXT: fmov d3, x9 -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov v2.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x9 -; CHECK-FP16-NEXT: cmgt v0.2d, v1.2d, v2.2d -; CHECK-FP16-NEXT: cmgt v4.2d, v1.2d, v3.2d -; CHECK-FP16-NEXT: bsl v0.16b, v2.16b, v1.16b -; CHECK-FP16-NEXT: bit v1.16b, v3.16b, v4.16b -; CHECK-FP16-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-FP16-NEXT: xtn v0.2s, v0.2d -; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: ustest_f16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -369,12 +302,8 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: uqxtn v0.4h, v0.4s ; CHECK-NEXT: ret entry: %conv = fptosi <4 x float> %x to <4 x i32> @@ -436,21 +365,22 @@ } define <8 x i16> @ustest_f16i16(<8 x half> %x) { -; CHECK-LABEL: ustest_f16i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl v2.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-NEXT: smax v2.4s, v0.4s, v3.4s -; CHECK-NEXT: xtn v0.4h, v1.4s -; CHECK-NEXT: xtn2 v0.8h, v2.4s -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: ustest_f16i16: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-CVT-NEXT: uqxtn v0.4h, v0.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: ustest_f16i16: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret entry: %conv = fptosi <8 x half> %x to <8 x i32> %0 = icmp slt <8 x i32> %conv, @@ -852,13 +782,12 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: cmgt v1.2d, v0.2d, #0 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -908,21 +837,7 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl v2.2d, v0.2s -; CHECK-NEXT: fcvtl2 v0.2d, v0.4s -; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d -; CHECK-NEXT: cmgt v4.2d, v1.2d, v0.2d -; CHECK-NEXT: bif v2.16b, v1.16b, v3.16b -; CHECK-NEXT: bif v0.16b, v1.16b, v4.16b -; CHECK-NEXT: cmgt v1.2d, v2.2d, #0 -; CHECK-NEXT: cmgt v3.2d, v0.2d, #0 -; CHECK-NEXT: and v1.16b, v2.16b, v1.16b -; CHECK-NEXT: and v2.16b, v0.16b, v3.16b -; CHECK-NEXT: xtn v0.2s, v1.2d -; CHECK-NEXT: xtn2 v0.4s, v2.2d +; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: ret entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -1008,63 +923,11 @@ } define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { -; CHECK-CVT-LABEL: ustest_f16i32_mm: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs x9, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s3 -; CHECK-CVT-NEXT: fmov d3, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s0 -; CHECK-CVT-NEXT: mov v2.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x9 -; CHECK-CVT-NEXT: cmgt v0.2d, v1.2d, v2.2d -; CHECK-CVT-NEXT: cmgt v4.2d, v1.2d, v3.2d -; CHECK-CVT-NEXT: bsl v0.16b, v2.16b, v1.16b -; CHECK-CVT-NEXT: bit v1.16b, v3.16b, v4.16b -; CHECK-CVT-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-CVT-NEXT: xtn v0.2s, v0.2d -; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: ustest_f16i32_mm: -; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs x8, h0 -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-NEXT: fcvtzs x9, h2 -; CHECK-FP16-NEXT: fmov d2, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h3 -; CHECK-FP16-NEXT: fmov d3, x9 -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov v2.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x9 -; CHECK-FP16-NEXT: cmgt v0.2d, v1.2d, v2.2d -; CHECK-FP16-NEXT: cmgt v4.2d, v1.2d, v3.2d -; CHECK-FP16-NEXT: bsl v0.16b, v2.16b, v1.16b -; CHECK-FP16-NEXT: bit v1.16b, v3.16b, v4.16b -; CHECK-FP16-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-FP16-NEXT: xtn v0.2s, v0.2d -; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: ustest_f16i32_mm: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -1156,12 +1019,8 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i16_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: uqxtn v0.4h, v0.4s ; CHECK-NEXT: ret entry: %conv = fptosi <4 x float> %x to <4 x i32> @@ -1218,21 +1077,22 @@ } define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { -; CHECK-LABEL: ustest_f16i16_mm: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl v2.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-NEXT: smax v2.4s, v0.4s, v3.4s -; CHECK-NEXT: xtn v0.4h, v1.4s -; CHECK-NEXT: xtn2 v0.8h, v2.4s -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: ustest_f16i16_mm: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-CVT-NEXT: uqxtn v0.4h, v0.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: ustest_f16i16_mm: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret entry: %conv = fptosi <8 x half> %x to <8 x i32> %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> ) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -198,24 +198,9 @@ ; ; FULL-LABEL: ustest_f64i32: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov r0, r1, d0 -; FULL-NEXT: bl __aeabi_d2lz -; FULL-NEXT: subs.w r2, r0, #-1 -; FULL-NEXT: sbcs r2, r1, #0 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq.w r0, #-1 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: sbcs.w r1, r2, r1 -; FULL-NEXT: cset r1, lt -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r0, r1, ne -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.u32.f64 s0, d0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptosi double %x to i64 %0 = icmp slt i64 %conv, 4294967295 @@ -369,51 +354,11 @@ ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; -; VFP2-LABEL: ustest_f32i32: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: bl __aeabi_f2lz -; VFP2-NEXT: subs.w r3, r0, #-1 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: sbcs r3, r1, #0 -; VFP2-NEXT: mov.w r3, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r3, #1 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: ite ne -; VFP2-NEXT: movne r3, r1 -; VFP2-NEXT: moveq.w r0, #-1 -; VFP2-NEXT: rsbs r1, r0, #0 -; VFP2-NEXT: sbcs.w r1, r2, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 -; VFP2-NEXT: pop {r7, pc} -; -; FULL-LABEL: ustest_f32i32: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: bl __aeabi_f2lz -; FULL-NEXT: subs.w r2, r0, #-1 -; FULL-NEXT: sbcs r2, r1, #0 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq.w r0, #-1 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: sbcs.w r1, r2, r1 -; FULL-NEXT: cset r1, lt -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r0, r1, ne -; FULL-NEXT: pop {r7, pc} +; VFP-LABEL: ustest_f32i32: +; VFP: @ %bb.0: @ %entry +; VFP-NEXT: vcvt.u32.f32 s0, s0 +; VFP-NEXT: vmov r0, s0 +; VFP-NEXT: bx lr entry: %conv = fptosi float %x to i64 %0 = icmp slt i64 %conv, 4294967295 @@ -604,47 +549,16 @@ ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: bl __aeabi_f2lz -; VFP2-NEXT: subs.w r3, r0, #-1 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: sbcs r3, r1, #0 -; VFP2-NEXT: mov.w r3, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r3, #1 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: ite ne -; VFP2-NEXT: movne r3, r1 -; VFP2-NEXT: moveq.w r0, #-1 -; VFP2-NEXT: rsbs r1, r0, #0 -; VFP2-NEXT: sbcs.w r1, r2, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f16i32: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixhfdi -; FULL-NEXT: subs.w r2, r0, #-1 -; FULL-NEXT: sbcs r2, r1, #0 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq.w r0, #-1 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: sbcs.w r1, r2, r1 -; FULL-NEXT: cset r1, lt -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r0, r1, ne -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.u32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i64 %0 = icmp slt i64 %conv, 4294967295 @@ -2411,21 +2325,9 @@ ; ; FULL-LABEL: ustest_f64i32_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov r0, r1, d0 -; FULL-NEXT: bl __aeabi_d2lz -; FULL-NEXT: mov r2, r0 -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: it pl -; FULL-NEXT: movpl.w r2, #-1 -; FULL-NEXT: csel r0, r0, r2, eq -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: csel r1, r1, r2, mi -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r1, r0, r2, gt -; FULL-NEXT: csel r0, r0, r1, eq -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.u32.f64 s0, d0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptosi double %x to i64 %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295) @@ -2582,45 +2484,11 @@ ; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: pop {r4, pc} ; -; VFP2-LABEL: ustest_f32i32_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: bl __aeabi_f2lz -; VFP2-NEXT: mov r2, r0 -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl.w r2, #-1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r0, r2 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r1, r2 -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r2, r0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r0, r2 -; VFP2-NEXT: pop {r7, pc} -; -; FULL-LABEL: ustest_f32i32_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: bl __aeabi_f2lz -; FULL-NEXT: mov r2, r0 -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: it pl -; FULL-NEXT: movpl.w r2, #-1 -; FULL-NEXT: csel r0, r0, r2, eq -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: csel r1, r1, r2, mi -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r1, r0, r2, gt -; FULL-NEXT: csel r0, r0, r1, eq -; FULL-NEXT: pop {r7, pc} +; VFP-LABEL: ustest_f32i32_mm: +; VFP: @ %bb.0: @ %entry +; VFP-NEXT: vcvt.u32.f32 s0, s0 +; VFP-NEXT: vmov r0, s0 +; VFP-NEXT: bx lr entry: %conv = fptosi float %x to i64 %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295) @@ -2813,41 +2681,16 @@ ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: bl __aeabi_f2lz -; VFP2-NEXT: mov r2, r0 -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl.w r2, #-1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r0, r2 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r1, r2 -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r2, r0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r0, r2 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f16i32_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixhfdi -; FULL-NEXT: mov r2, r0 -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: it pl -; FULL-NEXT: movpl.w r2, #-1 -; FULL-NEXT: csel r0, r0, r2, eq -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: csel r1, r1, r2, mi -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r1, r0, r2, gt -; FULL-NEXT: csel r0, r0, r1, eq -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.u32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i64 %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -175,40 +175,40 @@ } define i32 @ustest_f64i32(double %x) { -; RV32-LABEL: ustest_f64i32: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixdfdi@plt -; RV32-NEXT: beqz a1, .LBB2_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a2, a1, 0 -; RV32-NEXT: beqz a2, .LBB2_3 -; RV32-NEXT: j .LBB2_4 -; RV32-NEXT: .LBB2_2: -; RV32-NEXT: addi a2, a0, 1 -; RV32-NEXT: snez a2, a2 -; RV32-NEXT: bnez a2, .LBB2_4 -; RV32-NEXT: .LBB2_3: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: .LBB2_4: # %entry -; RV32-NEXT: beqz a1, .LBB2_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: sgtz a1, a1 -; RV32-NEXT: beqz a1, .LBB2_7 -; RV32-NEXT: j .LBB2_8 -; RV32-NEXT: .LBB2_6: -; RV32-NEXT: snez a1, a0 -; RV32-NEXT: bnez a1, .LBB2_8 -; RV32-NEXT: .LBB2_7: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: .LBB2_8: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32IF-LABEL: ustest_f64i32: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: .cfi_def_cfa_offset 16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: call __fixdfdi@plt +; RV32IF-NEXT: beqz a1, .LBB2_2 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: slti a2, a1, 0 +; RV32IF-NEXT: beqz a2, .LBB2_3 +; RV32IF-NEXT: j .LBB2_4 +; RV32IF-NEXT: .LBB2_2: +; RV32IF-NEXT: addi a2, a0, 1 +; RV32IF-NEXT: snez a2, a2 +; RV32IF-NEXT: bnez a2, .LBB2_4 +; RV32IF-NEXT: .LBB2_3: # %entry +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: .LBB2_4: # %entry +; RV32IF-NEXT: beqz a1, .LBB2_6 +; RV32IF-NEXT: # %bb.5: # %entry +; RV32IF-NEXT: sgtz a1, a1 +; RV32IF-NEXT: beqz a1, .LBB2_7 +; RV32IF-NEXT: j .LBB2_8 +; RV32IF-NEXT: .LBB2_6: +; RV32IF-NEXT: snez a1, a0 +; RV32IF-NEXT: bnez a1, .LBB2_8 +; RV32IF-NEXT: .LBB2_7: # %entry +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: .LBB2_8: # %entry +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: ustest_f64i32: ; RV64IF: # %bb.0: # %entry @@ -231,6 +231,24 @@ ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; +; RV32IFD-LABEL: ustest_f64i32: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: bnez a0, .LBB2_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB2_2: +; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; ; RV64IFD-LABEL: ustest_f64i32: ; RV64IFD: # %bb.0: # %entry ; RV64IFD-NEXT: fmv.d.x ft0, a0 @@ -344,37 +362,14 @@ define i32 @ustest_f32i32(float %x) { ; RV32-LABEL: ustest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: beqz a1, .LBB5_2 +; RV32-NEXT: fmv.w.x ft0, a0 +; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: bnez a0, .LBB5_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a2, a1, 0 -; RV32-NEXT: beqz a2, .LBB5_3 -; RV32-NEXT: j .LBB5_4 -; RV32-NEXT: .LBB5_2: -; RV32-NEXT: addi a2, a0, 1 -; RV32-NEXT: snez a2, a2 -; RV32-NEXT: bnez a2, .LBB5_4 -; RV32-NEXT: .LBB5_3: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: .LBB5_4: # %entry -; RV32-NEXT: beqz a1, .LBB5_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: sgtz a1, a1 -; RV32-NEXT: beqz a1, .LBB5_7 -; RV32-NEXT: j .LBB5_8 -; RV32-NEXT: .LBB5_6: -; RV32-NEXT: snez a1, a0 -; RV32-NEXT: bnez a1, .LBB5_8 -; RV32-NEXT: .LBB5_7: # %entry ; RV32-NEXT: li a0, 0 -; RV32-NEXT: .LBB5_8: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; RV32-NEXT: .LBB5_2: +; RV32-NEXT: fcvt.wu.s a0, ft0, rtz ; RV32-NEXT: ret ; ; RV64-LABEL: ustest_f32i32: @@ -2128,44 +2123,44 @@ } define i32 @ustest_f64i32_mm(double %x) { -; RV32-LABEL: ustest_f64i32_mm: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixdfdi@plt -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bgez a1, .LBB29_7 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: bnez a1, .LBB29_8 -; RV32-NEXT: .LBB29_2: # %entry -; RV32-NEXT: bgez a1, .LBB29_9 -; RV32-NEXT: .LBB29_3: # %entry -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: blez a1, .LBB29_10 -; RV32-NEXT: .LBB29_4: # %entry -; RV32-NEXT: beqz a1, .LBB29_6 -; RV32-NEXT: .LBB29_5: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB29_6: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; RV32-NEXT: .LBB29_7: # %entry -; RV32-NEXT: li a2, -1 -; RV32-NEXT: beqz a1, .LBB29_2 -; RV32-NEXT: .LBB29_8: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bltz a1, .LBB29_3 -; RV32-NEXT: .LBB29_9: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bgtz a1, .LBB29_4 -; RV32-NEXT: .LBB29_10: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bnez a1, .LBB29_5 -; RV32-NEXT: j .LBB29_6 +; RV32IF-LABEL: ustest_f64i32_mm: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: .cfi_def_cfa_offset 16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: call __fixdfdi@plt +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bgez a1, .LBB29_7 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: bnez a1, .LBB29_8 +; RV32IF-NEXT: .LBB29_2: # %entry +; RV32IF-NEXT: bgez a1, .LBB29_9 +; RV32IF-NEXT: .LBB29_3: # %entry +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: blez a1, .LBB29_10 +; RV32IF-NEXT: .LBB29_4: # %entry +; RV32IF-NEXT: beqz a1, .LBB29_6 +; RV32IF-NEXT: .LBB29_5: # %entry +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: .LBB29_6: # %entry +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB29_7: # %entry +; RV32IF-NEXT: li a2, -1 +; RV32IF-NEXT: beqz a1, .LBB29_2 +; RV32IF-NEXT: .LBB29_8: # %entry +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: bltz a1, .LBB29_3 +; RV32IF-NEXT: .LBB29_9: # %entry +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: mv a2, a0 +; RV32IF-NEXT: bgtz a1, .LBB29_4 +; RV32IF-NEXT: .LBB29_10: # %entry +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: bnez a1, .LBB29_5 +; RV32IF-NEXT: j .LBB29_6 ; ; RV64IF-LABEL: ustest_f64i32_mm: ; RV64IF: # %bb.0: # %entry @@ -2188,6 +2183,24 @@ ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; +; RV32IFD-LABEL: ustest_f64i32_mm: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: bnez a0, .LBB29_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB29_2: +; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; ; RV64IFD-LABEL: ustest_f64i32_mm: ; RV64IFD: # %bb.0: # %entry ; RV64IFD-NEXT: fmv.d.x ft0, a0 @@ -2288,42 +2301,15 @@ define i32 @ustest_f32i32_mm(float %x) { ; RV32-LABEL: ustest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bgez a1, .LBB32_7 +; RV32-NEXT: fmv.w.x ft0, a0 +; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: bnez a0, .LBB32_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: bnez a1, .LBB32_8 -; RV32-NEXT: .LBB32_2: # %entry -; RV32-NEXT: bgez a1, .LBB32_9 -; RV32-NEXT: .LBB32_3: # %entry -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: blez a1, .LBB32_10 -; RV32-NEXT: .LBB32_4: # %entry -; RV32-NEXT: beqz a1, .LBB32_6 -; RV32-NEXT: .LBB32_5: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB32_6: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: ret +; RV32-NEXT: .LBB32_2: +; RV32-NEXT: fcvt.wu.s a0, ft0, rtz ; RV32-NEXT: ret -; RV32-NEXT: .LBB32_7: # %entry -; RV32-NEXT: li a2, -1 -; RV32-NEXT: beqz a1, .LBB32_2 -; RV32-NEXT: .LBB32_8: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bltz a1, .LBB32_3 -; RV32-NEXT: .LBB32_9: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bgtz a1, .LBB32_4 -; RV32-NEXT: .LBB32_10: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: bnez a1, .LBB32_5 -; RV32-NEXT: j .LBB32_6 ; ; RV64-LABEL: ustest_f32i32_mm: ; RV64: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -274,101 +274,8 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs.w r2, r5, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 -; CHECK-NEXT: sbcs r2, r6, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov.i32 q6, #0x0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r3, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: vpsel q0, q0, q5 -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: sbcs.w r0, r7, r1 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: rsbs r0, r2, #0 -; CHECK-NEXT: sbcs.w r0, r7, r3 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r1, r0, #8, #8 -; CHECK-NEXT: vmov r0, r4, d8 -; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: vpsel q7, q0, q6 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs.w r2, r5, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 -; CHECK-NEXT: sbcs r2, r6, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r3, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: vpsel q0, q0, q5 -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: sbcs.w r0, r7, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: rsbs r1, r2, #0 -; CHECK-NEXT: sbcs.w r1, r7, r3 -; CHECK-NEXT: bfi r7, r0, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r7, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r7 -; CHECK-NEXT: vpsel q0, q0, q6 -; CHECK-NEXT: vmov.f32 s1, s2 -; CHECK-NEXT: vmov.f32 s2, s28 -; CHECK-NEXT: vmov.f32 s3, s30 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: vcvt.u32.f32 q0, q0 +; CHECK-NEXT: bx lr entry: %conv = fptosi <4 x float> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -1635,101 +1542,8 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs.w r2, r5, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 -; CHECK-NEXT: sbcs r2, r6, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov.i32 q6, #0x0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r3, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: vpsel q0, q0, q5 -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: sbcs.w r0, r7, r1 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: rsbs r0, r2, #0 -; CHECK-NEXT: sbcs.w r0, r7, r3 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r1, r0, #8, #8 -; CHECK-NEXT: vmov r0, r4, d8 -; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: vpsel q7, q0, q6 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs.w r2, r5, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 -; CHECK-NEXT: sbcs r2, r6, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r3, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: vpsel q0, q0, q5 -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: sbcs.w r0, r7, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: rsbs r1, r2, #0 -; CHECK-NEXT: sbcs.w r1, r7, r3 -; CHECK-NEXT: bfi r7, r0, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r7, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r7 -; CHECK-NEXT: vpsel q0, q0, q6 -; CHECK-NEXT: vmov.f32 s1, s2 -; CHECK-NEXT: vmov.f32 s2, s28 -; CHECK-NEXT: vmov.f32 s3, s30 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: vcvt.u32.f32 q0, q0 +; CHECK-NEXT: bx lr entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -46,23 +46,9 @@ define i32 @ustest_f64i32(double %x) { ; CHECK-LABEL: ustest_f64i32: ; CHECK: .functype ustest_f64i32 (f64) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f64_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f64_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi double %x to i64 @@ -117,23 +103,9 @@ define i32 @ustest_f32i32(float %x) { ; CHECK-LABEL: ustest_f32i32: ; CHECK: .functype ustest_f32i32 (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi float %x to i64 @@ -192,25 +164,11 @@ define i32 @ustest_f16i32(half %x) { ; CHECK-LABEL: ustest_f16i32: ; CHECK: .functype ustest_f16i32 (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -874,23 +832,9 @@ define i32 @ustest_f64i32_mm(double %x) { ; CHECK-LABEL: ustest_f64i32_mm: ; CHECK: .functype ustest_f64i32_mm (f64) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f64_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f64_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi double %x to i64 @@ -940,23 +884,9 @@ define i32 @ustest_f32i32_mm(float %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: .functype ustest_f32i32_mm (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi float %x to i64 @@ -1010,25 +940,11 @@ define i32 @ustest_f16i32_mm(half %x) { ; CHECK-LABEL: ustest_f16i32_mm: ; CHECK: .functype ustest_f16i32_mm (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -213,51 +213,9 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32: ; CHECK: .functype ustest_f32i32 (v128) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 0 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 1 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: v128.const 4294967295, 4294967295 -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: v128.const 0, 0 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 3 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -387,16 +345,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32: ; CHECK: .functype ustest_f16i32 (f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -404,44 +353,21 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: v128.const 4294967295, 4294967295 -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: v128.const 0, 0 -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -1878,51 +1804,9 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: .functype ustest_f32i32_mm (v128) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 0 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 1 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: v128.const 4294967295, 4294967295 -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: v128.const 0, 0 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 3 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -2047,16 +1931,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32_mm: ; CHECK: .functype ustest_f16i32_mm (f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -2064,44 +1939,21 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: v128.const 4294967295, 4294967295 -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: v128.const 0, 0 -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x half> %x to <4 x i64>