diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4892,6 +4892,42 @@ : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); } +static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + SelectionDAG &DAG) { + // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a + // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may + // be truncated versions of the the setcc (N0/N1). + if ((N0 != N2 && + (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) || + N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT) + return SDValue(); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + ConstantSDNode *N3C = isConstOrConstSplat(N3); + if (!N1C || !N3C) + return SDValue(); + const APInt &C1 = N1C->getAPIntValue(); + const APInt &C3 = N3C->getAPIntValue(); + if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() || + C1 != C3.zextOrSelf(C1.getBitWidth())) + return SDValue(); + + unsigned BW = (C1 + 1).exactLogBase2(); + EVT FPVT = N0.getOperand(0).getValueType(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW); + if (FPVT.isVector()) + NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, + FPVT.getVectorElementCount()); + if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT, + FPVT, NewVT)) + return SDValue(); + + SDValue Sat = + DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0), + DAG.getValueType(NewVT.getScalarType())); + return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType()); +} + SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4934,6 +4970,9 @@ if (SDValue S = PerformMinMaxFpToSatCombine( N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG)) return S; + if (Opcode == ISD::UMIN) + if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG)) + return S; // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) @@ -10314,6 +10353,8 @@ if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) return S; + if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) + return S; // If this select has a condition (setcc) with narrower operands than the // select, try to widen the compare to match the select width. @@ -23372,6 +23413,8 @@ if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG)) return S; + if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG)) + return S; return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -22,10 +22,7 @@ define i32 @utest_f64i32(double %x) { ; CHECK-LABEL: utest_f64i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csinv w0, w8, wzr, lo +; CHECK-NEXT: fcvtzu w0, d0 ; CHECK-NEXT: ret entry: %conv = fptoui double %x to i64 @@ -68,10 +65,7 @@ define i32 @utest_f32i32(float %x) { ; CHECK-LABEL: utest_f32i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu x8, s0 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csinv w0, w8, wzr, lo +; CHECK-NEXT: fcvtzu w0, s0 ; CHECK-NEXT: ret entry: %conv = fptoui float %x to i64 @@ -121,18 +115,12 @@ ; CHECK-CVT-LABEL: utesth_f16i32: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #-1 -; CHECK-CVT-NEXT: fcvtzu x8, s0 -; CHECK-CVT-NEXT: cmp x8, x9 -; CHECK-CVT-NEXT: csinv w0, w8, wzr, lo +; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: utesth_f16i32: ; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: fcvtzu x8, h0 -; CHECK-FP16-NEXT: mov w9, #-1 -; CHECK-FP16-NEXT: cmp x8, x9 -; CHECK-FP16-NEXT: csinv w0, w8, wzr, lo +; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret entry: %conv = fptoui half %x to i64 @@ -581,11 +569,7 @@ define i32 @utest_f64i32_mm(double %x) { ; CHECK-LABEL: utest_f64i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x0, x8, x9, lo -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fcvtzu w0, d0 ; CHECK-NEXT: ret entry: %conv = fptoui double %x to i64 @@ -623,11 +607,7 @@ define i32 @utest_f32i32_mm(float %x) { ; CHECK-LABEL: utest_f32i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzu x8, s0 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x0, x8, x9, lo -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fcvtzu w0, s0 ; CHECK-NEXT: ret entry: %conv = fptoui float %x to i64 @@ -672,20 +652,12 @@ ; CHECK-CVT-LABEL: utesth_f16i32_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #-1 -; CHECK-CVT-NEXT: fcvtzu x8, s0 -; CHECK-CVT-NEXT: cmp x8, x9 -; CHECK-CVT-NEXT: csel x0, x8, x9, lo -; CHECK-CVT-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-CVT-NEXT: fcvtzu w0, s0 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: utesth_f16i32_mm: ; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: fcvtzu x8, h0 -; CHECK-FP16-NEXT: mov w9, #-1 -; CHECK-FP16-NEXT: cmp x8, x9 -; CHECK-FP16-NEXT: csel x0, x8, x9, lo -; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret entry: %conv = fptoui half %x to i64 diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -27,12 +27,12 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-LABEL: utest_f64i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %conv = fptoui <2 x double> %x to <2 x i64> @@ -80,18 +80,7 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl2 v2.2d, v0.4s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-NEXT: cmhi v3.2d, v1.2d, v2.2d -; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d -; CHECK-NEXT: and v2.16b, v2.16b, v3.16b -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: orn v2.16b, v2.16b, v3.16b -; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: ret entry: %conv = fptoui <4 x float> %x to <4 x i64> @@ -133,57 +122,11 @@ } define <4 x i32> @utesth_f16i32(<4 x half> %x) { -; CHECK-CVT-LABEL: utesth_f16i32: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[3] -; CHECK-CVT-NEXT: mov h4, v0.h[1] -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x9, s0 -; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h4 -; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s3 -; CHECK-CVT-NEXT: fmov d3, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s2 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x9 -; CHECK-CVT-NEXT: cmhi v2.2d, v1.2d, v0.2d -; CHECK-CVT-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: and v3.16b, v3.16b, v1.16b -; CHECK-CVT-NEXT: orn v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: orn v1.16b, v3.16b, v1.16b -; CHECK-CVT-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: utesth_f16i32: -; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h3, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-NEXT: fcvtzu x8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzu x8, h3 -; CHECK-FP16-NEXT: fmov d3, x9 -; CHECK-FP16-NEXT: fcvtzu x9, h2 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x9 -; CHECK-FP16-NEXT: cmhi v2.2d, v1.2d, v0.2d -; CHECK-FP16-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-NEXT: and v3.16b, v3.16b, v1.16b -; CHECK-FP16-NEXT: orn v0.16b, v0.16b, v2.16b -; CHECK-FP16-NEXT: orn v1.16b, v3.16b, v1.16b -; CHECK-FP16-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: utesth_f16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> %0 = icmp ult <4 x i64> %conv, @@ -338,17 +281,22 @@ } define <8 x i16> @utesth_f16i16(<8 x half> %x) { -; CHECK-LABEL: utesth_f16i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: fcvtzu v2.4s, v2.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: utesth_f16i16: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: utesth_f16i16: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret entry: %conv = fptoui <8 x half> %x to <8 x i32> %0 = icmp ult <8 x i32> %conv, @@ -758,12 +706,12 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: utest_f64i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %conv = fptoui <2 x double> %x to <2 x i64> @@ -806,18 +754,7 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl2 v2.2d, v0.4s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-NEXT: cmhi v3.2d, v1.2d, v2.2d -; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d -; CHECK-NEXT: and v2.16b, v2.16b, v3.16b -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: orn v2.16b, v2.16b, v3.16b -; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: ret entry: %conv = fptoui <4 x float> %x to <4 x i64> @@ -854,57 +791,11 @@ } define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { -; CHECK-CVT-LABEL: utesth_f16i32_mm: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[3] -; CHECK-CVT-NEXT: mov h4, v0.h[1] -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x9, s0 -; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h4 -; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s3 -; CHECK-CVT-NEXT: fmov d3, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s2 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x9 -; CHECK-CVT-NEXT: cmhi v2.2d, v1.2d, v0.2d -; CHECK-CVT-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: and v3.16b, v3.16b, v1.16b -; CHECK-CVT-NEXT: orn v0.16b, v0.16b, v2.16b -; CHECK-CVT-NEXT: orn v1.16b, v3.16b, v1.16b -; CHECK-CVT-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: utesth_f16i32_mm: -; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h3, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-NEXT: fcvtzu x8, h2 -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzu x8, h3 -; CHECK-FP16-NEXT: fmov d3, x9 -; CHECK-FP16-NEXT: fcvtzu x9, h2 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x9 -; CHECK-FP16-NEXT: cmhi v2.2d, v1.2d, v0.2d -; CHECK-FP16-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-NEXT: and v3.16b, v3.16b, v1.16b -; CHECK-FP16-NEXT: orn v0.16b, v0.16b, v2.16b -; CHECK-FP16-NEXT: orn v1.16b, v3.16b, v1.16b -; CHECK-FP16-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: utesth_f16i32_mm: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -1044,17 +935,22 @@ } define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { -; CHECK-LABEL: utesth_f16i16_mm: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: fcvtzu v2.4s, v2.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: utesth_f16i16_mm: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: utesth_f16i16_mm: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret entry: %conv = fptoui <8 x half> %x to <8 x i32> %spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> ) diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -104,17 +104,23 @@ ; SOFT-NEXT: .LBB1_2: @ %entry ; SOFT-NEXT: pop {r7, pc} ; -; VFP-LABEL: utest_f64i32: -; VFP: @ %bb.0: @ %entry -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: vmov r0, r1, d0 -; VFP-NEXT: bl __aeabi_d2ulz -; VFP-NEXT: subs.w r2, r0, #-1 -; VFP-NEXT: sbcs r1, r1, #0 -; VFP-NEXT: it hs -; VFP-NEXT: movhs.w r0, #-1 -; VFP-NEXT: pop {r7, pc} +; VFP2-LABEL: utest_f64i32: +; VFP2: @ %bb.0: @ %entry +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: vmov r0, r1, d0 +; VFP2-NEXT: bl __aeabi_d2ulz +; VFP2-NEXT: subs.w r2, r0, #-1 +; VFP2-NEXT: sbcs r1, r1, #0 +; VFP2-NEXT: it hs +; VFP2-NEXT: movhs.w r0, #-1 +; VFP2-NEXT: pop {r7, pc} +; +; FULL-LABEL: utest_f64i32: +; FULL: @ %bb.0: @ %entry +; FULL-NEXT: vcvt.u32.f64 s0, d0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptoui double %x to i64 %0 = icmp ult i64 %conv, 4294967295 @@ -289,15 +295,9 @@ ; ; VFP-LABEL: utest_f32i32: ; VFP: @ %bb.0: @ %entry -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} +; VFP-NEXT: vcvt.u32.f32 s0, s0 ; VFP-NEXT: vmov r0, s0 -; VFP-NEXT: bl __aeabi_f2ulz -; VFP-NEXT: subs.w r2, r0, #-1 -; VFP-NEXT: sbcs r1, r1, #0 -; VFP-NEXT: it hs -; VFP-NEXT: movhs.w r0, #-1 -; VFP-NEXT: pop {r7, pc} +; VFP-NEXT: bx lr entry: %conv = fptoui float %x to i64 %0 = icmp ult i64 %conv, 4294967295 @@ -466,25 +466,16 @@ ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: bl __aeabi_f2ulz -; VFP2-NEXT: subs.w r2, r0, #-1 -; VFP2-NEXT: sbcs r1, r1, #0 -; VFP2-NEXT: it hs -; VFP2-NEXT: movhs.w r0, #-1 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utesth_f16i32: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixunshfdi -; FULL-NEXT: subs.w r2, r0, #-1 -; FULL-NEXT: sbcs r1, r1, #0 -; FULL-NEXT: it hs -; FULL-NEXT: movhs.w r0, #-1 -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.u32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptoui half %x to i64 %0 = icmp ult i64 %conv, 4294967295 @@ -2240,16 +2231,22 @@ ; SOFT-NEXT: .LBB28_2: @ %entry ; SOFT-NEXT: pop {r7, pc} ; -; VFP-LABEL: utest_f64i32_mm: -; VFP: @ %bb.0: @ %entry -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: vmov r0, r1, d0 -; VFP-NEXT: bl __aeabi_d2ulz -; VFP-NEXT: cmp r1, #0 -; VFP-NEXT: it ne -; VFP-NEXT: movne.w r0, #-1 -; VFP-NEXT: pop {r7, pc} +; VFP2-LABEL: utest_f64i32_mm: +; VFP2: @ %bb.0: @ %entry +; VFP2-NEXT: .save {r7, lr} +; VFP2-NEXT: push {r7, lr} +; VFP2-NEXT: vmov r0, r1, d0 +; VFP2-NEXT: bl __aeabi_d2ulz +; VFP2-NEXT: cmp r1, #0 +; VFP2-NEXT: it ne +; VFP2-NEXT: movne.w r0, #-1 +; VFP2-NEXT: pop {r7, pc} +; +; FULL-LABEL: utest_f64i32_mm: +; FULL: @ %bb.0: @ %entry +; FULL-NEXT: vcvt.u32.f64 s0, d0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptoui double %x to i64 %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295) @@ -2429,14 +2426,9 @@ ; ; VFP-LABEL: utest_f32i32_mm: ; VFP: @ %bb.0: @ %entry -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} +; VFP-NEXT: vcvt.u32.f32 s0, s0 ; VFP-NEXT: vmov r0, s0 -; VFP-NEXT: bl __aeabi_f2ulz -; VFP-NEXT: cmp r1, #0 -; VFP-NEXT: it ne -; VFP-NEXT: movne.w r0, #-1 -; VFP-NEXT: pop {r7, pc} +; VFP-NEXT: bx lr entry: %conv = fptoui float %x to i64 %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295) @@ -2609,23 +2601,16 @@ ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: bl __aeabi_f2ulz -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne.w r0, #-1 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utesth_f16i32_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixunshfdi -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: it ne -; FULL-NEXT: movne.w r0, #-1 -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.u32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptoui half %x to i64 %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -115,28 +115,28 @@ } define i32 @utest_f64i32(double %x) { -; RV32-LABEL: utest_f64i32: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixunsdfdi@plt -; RV32-NEXT: beqz a1, .LBB1_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a1, .LBB1_3 -; RV32-NEXT: j .LBB1_4 -; RV32-NEXT: .LBB1_2: -; RV32-NEXT: addi a1, a0, 1 -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: bnez a1, .LBB1_4 -; RV32-NEXT: .LBB1_3: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: .LBB1_4: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32IF-LABEL: utest_f64i32: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: .cfi_def_cfa_offset 16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: call __fixunsdfdi@plt +; RV32IF-NEXT: beqz a1, .LBB1_2 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a1, .LBB1_3 +; RV32IF-NEXT: j .LBB1_4 +; RV32IF-NEXT: .LBB1_2: +; RV32IF-NEXT: addi a1, a0, 1 +; RV32IF-NEXT: snez a1, a1 +; RV32IF-NEXT: bnez a1, .LBB1_4 +; RV32IF-NEXT: .LBB1_3: # %entry +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: .LBB1_4: # %entry +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: utest_f64i32: ; RV64IF: # %bb.0: # %entry @@ -155,6 +155,24 @@ ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; +; RV32IFD-LABEL: utest_f64i32: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: bnez a0, .LBB1_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB1_2: +; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; ; RV64IFD-LABEL: utest_f64i32: ; RV64IFD: # %bb.0: # %entry ; RV64IFD-NEXT: fmv.d.x ft0, a0 @@ -319,25 +337,14 @@ define i32 @utest_f32i32(float %x) { ; RV32-LABEL: utest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: beqz a1, .LBB4_2 +; RV32-NEXT: fmv.w.x ft0, a0 +; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: bnez a0, .LBB4_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a1, .LBB4_3 -; RV32-NEXT: j .LBB4_4 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: ret ; RV32-NEXT: .LBB4_2: -; RV32-NEXT: addi a1, a0, 1 -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: bnez a1, .LBB4_4 -; RV32-NEXT: .LBB4_3: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: .LBB4_4: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: fcvt.wu.s a0, ft0, rtz ; RV32-NEXT: ret ; ; RV64-LABEL: utest_f32i32: @@ -2071,20 +2078,20 @@ } define i32 @utest_f64i32_mm(double %x) { -; RV32-LABEL: utest_f64i32_mm: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixunsdfdi@plt -; RV32-NEXT: beqz a1, .LBB28_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: .LBB28_2: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32IF-LABEL: utest_f64i32_mm: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: .cfi_def_cfa_offset 16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: call __fixunsdfdi@plt +; RV32IF-NEXT: beqz a1, .LBB28_2 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: .LBB28_2: # %entry +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: utest_f64i32_mm: ; RV64IF: # %bb.0: # %entry @@ -2103,6 +2110,24 @@ ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; +; RV32IFD-LABEL: utest_f64i32_mm: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: bnez a0, .LBB28_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB28_2: +; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; ; RV64IFD-LABEL: utest_f64i32_mm: ; RV64IFD: # %bb.0: # %entry ; RV64IFD-NEXT: fmv.d.x ft0, a0 @@ -2266,17 +2291,14 @@ define i32 @utest_f32i32_mm(float %x) { ; RV32-LABEL: utest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixunssfdi@plt -; RV32-NEXT: beqz a1, .LBB31_2 +; RV32-NEXT: fmv.w.x ft0, a0 +; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: bnez a0, .LBB31_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: .LBB31_2: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: ret +; RV32-NEXT: .LBB31_2: +; RV32-NEXT: fcvt.wu.s a0, ft0, rtz ; RV32-NEXT: ret ; ; RV64-LABEL: utest_f32i32_mm: diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -183,58 +183,8 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: subs.w r2, r5, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 -; CHECK-NEXT: sbcs r2, r6, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: csetm r0, lo -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: bfi r3, r0, #8, #8 -; CHECK-NEXT: vmov r0, r4, d8 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: vpsel q6, q0, q5 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: subs.w r2, r5, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 -; CHECK-NEXT: sbcs r2, r6, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r7, r2, #0, #8 -; CHECK-NEXT: csetm r0, lo -; CHECK-NEXT: bfi r7, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r7 -; CHECK-NEXT: vpsel q0, q0, q5 -; CHECK-NEXT: vmov.f32 s1, s2 -; CHECK-NEXT: vmov.f32 s2, s24 -; CHECK-NEXT: vmov.f32 s3, s26 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: vcvt.u32.f32 q0, q0 +; CHECK-NEXT: bx lr entry: %conv = fptoui <4 x float> %x to <4 x i64> %0 = icmp ult <4 x i64> %conv, @@ -1396,58 +1346,8 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: subs.w r2, r5, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 -; CHECK-NEXT: sbcs r2, r6, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: csetm r0, lo -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: bfi r3, r0, #8, #8 -; CHECK-NEXT: vmov r0, r4, d8 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: vpsel q6, q0, q5 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: subs.w r2, r5, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 -; CHECK-NEXT: sbcs r2, r6, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r7, r2, #0, #8 -; CHECK-NEXT: csetm r0, lo -; CHECK-NEXT: bfi r7, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r7 -; CHECK-NEXT: vpsel q0, q0, q5 -; CHECK-NEXT: vmov.f32 s1, s2 -; CHECK-NEXT: vmov.f32 s2, s24 -; CHECK-NEXT: vmov.f32 s3, s26 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: vcvt.u32.f32 q0, q0 +; CHECK-NEXT: bx lr entry: %conv = fptoui <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> ) diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -23,17 +23,9 @@ define i32 @utest_f64i32(double %x) { ; CHECK-LABEL: utest_f64i32: ; CHECK: .functype utest_f64i32 (f64) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f64_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f64_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i64 @@ -80,17 +72,9 @@ define i32 @utest_f32i32(float %x) { ; CHECK-LABEL: utest_f32i32: ; CHECK: .functype utest_f32i32 (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui float %x to i64 @@ -139,19 +123,11 @@ define i32 @utesth_f16i32(half %x) { ; CHECK-LABEL: utesth_f16i32: ; CHECK: .functype utesth_f16i32 (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i64 @@ -810,17 +786,9 @@ define i32 @utest_f64i32_mm(double %x) { ; CHECK-LABEL: utest_f64i32_mm: ; CHECK: .functype utest_f64i32_mm (f64) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f64_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f64_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i64 @@ -862,17 +830,9 @@ define i32 @utest_f32i32_mm(float %x) { ; CHECK-LABEL: utest_f32i32_mm: ; CHECK: .functype utest_f32i32_mm (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui float %x to i64 @@ -916,19 +876,11 @@ define i32 @utesth_f16i32_mm(half %x) { ; CHECK-LABEL: utesth_f16i32_mm: ; CHECK: .functype utesth_f16i32_mm (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i64 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -145,62 +145,9 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32: ; CHECK: .functype utest_f32i32 (v128) -> (v128) -; CHECK-NEXT: .local i64, i64, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 0 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 1 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.const 4294967295, 4294967295 -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 2 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 3 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <4 x float> %x to <4 x i64> @@ -267,16 +214,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-LABEL: utesth_f16i32: ; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local i64, i64, v128 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -284,55 +222,21 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.const 4294967295, 4294967295 -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -1742,62 +1646,9 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32_mm: ; CHECK: .functype utest_f32i32_mm (v128) -> (v128) -; CHECK-NEXT: .local i64, i64, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 0 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 1 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.const 4294967295, 4294967295 -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 2 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 3 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <4 x float> %x to <4 x i64> @@ -1859,16 +1710,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: utesth_f16i32_mm: ; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local i64, i64, v128 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -1876,55 +1718,21 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.const 4294967295, 4294967295 -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.trunc_sat_f32_u -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 4294967295 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <4 x half> %x to <4 x i64>