diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3354,6 +3354,9 @@ bool UseScalarMinMax = VT.isVector() && !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); + // ValueTracking's select pattern matching does not account for -0.0, + // so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that + // -0.0 is less than +0.0. Value *LHS, *RHS; auto SPR = matchSelectPattern(const_cast(&I), LHS, RHS); ISD::NodeType Opc = ISD::DELETED_NODE; @@ -3365,34 +3368,26 @@ case SPF_FMINNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); - case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break; + case SPNB_RETURNS_NAN: break; case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; - case SPNB_RETURNS_ANY: { - if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) + case SPNB_RETURNS_ANY: + if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) || + (UseScalarMinMax && + TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()))) Opc = ISD::FMINNUM; - else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)) - Opc = ISD::FMINIMUM; - else if (UseScalarMinMax) - Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? - ISD::FMINNUM : ISD::FMINIMUM; break; } - } break; case SPF_FMAXNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); - case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break; + case SPNB_RETURNS_NAN: break; case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; case SPNB_RETURNS_ANY: - - if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) + if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) || + (UseScalarMinMax && + TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()))) Opc = ISD::FMAXNUM; - else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)) - Opc = ISD::FMAXIMUM; - else if (UseScalarMinMax) - Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? - ISD::FMAXNUM : ISD::FMAXIMUM; break; } break; diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll --- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll @@ -7,7 +7,8 @@ %longer = fpext float %val to double ret double %longer -; CHECK: fmax s +; CHECK: fcmp +; CHECK: fcsel } define double @test_cross(float %in) { @@ -17,7 +18,8 @@ %longer = fpext float %val to double ret double %longer -; CHECK: fmin s +; CHECK: fcmp +; CHECK: fcsel } ; Same as previous, but with ordered comparison; diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -29,13 +29,15 @@ ; CHECK-NEXT: ldp q1, q2, [x13, #-16] ; CHECK-NEXT: subs x14, x14, #8 ; CHECK-NEXT: add x13, x13, #32 -; CHECK-NEXT: fcmlt v3.4s, v1.4s, #0.0 -; CHECK-NEXT: fmin v1.4s, v1.4s, v0.4s -; CHECK-NEXT: fcmlt v4.4s, v2.4s, #0.0 -; CHECK-NEXT: fmin v2.4s, v2.4s, v0.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v3.16b +; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s +; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 +; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s +; CHECK-NEXT: fcmlt v6.4s, v2.4s, #0.0 +; CHECK-NEXT: bit v1.16b, v0.16b, v3.16b +; CHECK-NEXT: bit v2.16b, v0.16b, v4.16b +; CHECK-NEXT: bic v1.16b, v1.16b, v5.16b ; CHECK-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NEXT: bic v2.16b, v2.16b, v4.16b +; CHECK-NEXT: bic v2.16b, v2.16b, v6.16b ; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: xtn v2.4h, v2.4s @@ -56,8 +58,9 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr s1, [x8], #4 ; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fcmp s1, s2 +; CHECK-NEXT: fcsel s2, s2, s1, gt ; CHECK-NEXT: fcmp s1, #0.0 -; CHECK-NEXT: fmin s2, s1, s2 ; CHECK-NEXT: fcsel s1, s0, s2, mi ; CHECK-NEXT: subs w10, w10, #1 ; CHECK-NEXT: fcvtzs w12, s1 @@ -168,16 +171,18 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp s2, s3, [x8], #8 ; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fmin s4, s2, s1 +; CHECK-NEXT: fcmp s2, s1 +; CHECK-NEXT: fcsel s4, s1, s2, gt ; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fmin s1, s3, s1 ; CHECK-NEXT: fcsel s2, s0, s4, mi +; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: fcsel s1, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 +; CHECK-NEXT: fcvtzs w12, s2 ; CHECK-NEXT: fcsel s1, s0, s1, mi +; CHECK-NEXT: strb w12, [x9] ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: fcvtzs w12, s2 ; CHECK-NEXT: fcvtzs w13, s1 -; CHECK-NEXT: strb w12, [x9] ; CHECK-NEXT: strb w13, [x9, #1] ; CHECK-NEXT: add x9, x9, #2 ; CHECK-NEXT: b.ne .LBB1_6 @@ -194,14 +199,16 @@ ; CHECK-NEXT: .LBB1_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v1.4s, v2.4s }, [x1], #32 -; CHECK-NEXT: fcmlt v3.4s, v1.4s, #0.0 +; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s ; CHECK-NEXT: subs x12, x12, #4 -; CHECK-NEXT: fmin v4.4s, v1.4s, v0.4s -; CHECK-NEXT: fcmlt v5.4s, v2.4s, #0.0 -; CHECK-NEXT: fmin v1.4s, v2.4s, v0.4s -; CHECK-NEXT: bic v2.16b, v4.16b, v3.16b +; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s +; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 +; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b +; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b +; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0 +; CHECK-NEXT: bic v2.16b, v3.16b, v5.16b ; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: bic v1.16b, v1.16b, v5.16b +; CHECK-NEXT: bic v1.16b, v4.16b, v1.16b ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: xtn v2.4h, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s @@ -339,23 +346,26 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp s2, s3, [x8] ; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fmin s4, s2, s1 +; CHECK-NEXT: fcmp s2, s1 +; CHECK-NEXT: fcsel s4, s1, s2, gt ; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: ldr s2, [x8, #8] -; CHECK-NEXT: fmin s5, s3, s1 +; CHECK-NEXT: fcsel s2, s0, s4, mi +; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: fcsel s4, s1, s3, gt +; CHECK-NEXT: fcmp s3, #0.0 +; CHECK-NEXT: ldr s3, [x8, #8] +; CHECK-NEXT: fcvtzs w12, s2 ; CHECK-NEXT: add x8, x8, #12 ; CHECK-NEXT: fcsel s4, s0, s4, mi +; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: strb w12, [x9] +; CHECK-NEXT: fcsel s1, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: fmin s1, s2, s1 -; CHECK-NEXT: fcsel s3, s0, s5, mi -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fcvtzs w12, s4 +; CHECK-NEXT: fcvtzs w13, s4 ; CHECK-NEXT: fcsel s1, s0, s1, mi +; CHECK-NEXT: strb w13, [x9, #1] ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: fcvtzs w13, s3 -; CHECK-NEXT: strb w12, [x9] ; CHECK-NEXT: fcvtzs w14, s1 -; CHECK-NEXT: strb w13, [x9, #1] ; CHECK-NEXT: strb w14, [x9, #2] ; CHECK-NEXT: add x9, x9, #3 ; CHECK-NEXT: b.ne .LBB2_6 @@ -375,16 +385,20 @@ ; CHECK-NEXT: .LBB2_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48 -; CHECK-NEXT: fcmlt v5.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmgt v5.4s, v2.4s, v1.4s ; CHECK-NEXT: add x13, x0, #8 -; CHECK-NEXT: fmin v6.4s, v2.4s, v1.4s +; CHECK-NEXT: fcmgt v7.4s, v3.4s, v1.4s ; CHECK-NEXT: subs x12, x12, #4 -; CHECK-NEXT: fcmlt v7.4s, v3.4s, #0.0 -; CHECK-NEXT: fmin v16.4s, v3.4s, v1.4s -; CHECK-NEXT: fmin v2.4s, v4.4s, v1.4s -; CHECK-NEXT: bic v5.16b, v6.16b, v5.16b +; CHECK-NEXT: fcmgt v17.4s, v4.4s, v1.4s +; CHECK-NEXT: fcmlt v6.4s, v2.4s, #0.0 +; CHECK-NEXT: bsl v5.16b, v1.16b, v2.16b +; CHECK-NEXT: fcmlt v16.4s, v3.4s, #0.0 +; CHECK-NEXT: bsl v7.16b, v1.16b, v3.16b +; CHECK-NEXT: mov v2.16b, v17.16b +; CHECK-NEXT: bic v5.16b, v5.16b, v6.16b ; CHECK-NEXT: fcmlt v6.4s, v4.4s, #0.0 -; CHECK-NEXT: bic v3.16b, v16.16b, v7.16b +; CHECK-NEXT: bsl v2.16b, v1.16b, v4.16b +; CHECK-NEXT: bic v3.16b, v7.16b, v16.16b ; CHECK-NEXT: fcvtzs v4.4s, v5.4s ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: bic v2.16b, v2.16b, v6.16b @@ -544,28 +558,32 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp s2, s3, [x8] ; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fmin s4, s2, s1 +; CHECK-NEXT: fcmp s2, s1 +; CHECK-NEXT: fcsel s4, s1, s2, gt ; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fmin s2, s3, s1 -; CHECK-NEXT: fcsel s4, s0, s4, mi +; CHECK-NEXT: fcsel s2, s0, s4, mi +; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: fcsel s4, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: ldp s5, s3, [x8, #8] +; CHECK-NEXT: ldp s3, s5, [x8, #8] +; CHECK-NEXT: fcvtzs w12, s2 ; CHECK-NEXT: add x8, x8, #16 -; CHECK-NEXT: fcsel s2, s0, s2, mi -; CHECK-NEXT: fcvtzs w12, s4 -; CHECK-NEXT: fmin s6, s5, s1 -; CHECK-NEXT: fcmp s5, #0.0 -; CHECK-NEXT: fmin s1, s3, s1 -; CHECK-NEXT: fcvtzs w13, s2 +; CHECK-NEXT: fcsel s4, s0, s4, mi +; CHECK-NEXT: fcmp s3, s1 ; CHECK-NEXT: strb w12, [x9] -; CHECK-NEXT: fcsel s5, s0, s6, mi +; CHECK-NEXT: fcsel s6, s1, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 +; CHECK-NEXT: fcvtzs w13, s4 +; CHECK-NEXT: fcsel s3, s0, s6, mi +; CHECK-NEXT: fcmp s5, s1 ; CHECK-NEXT: strb w13, [x9, #1] +; CHECK-NEXT: fcsel s1, s1, s5, gt +; CHECK-NEXT: fcmp s5, #0.0 +; CHECK-NEXT: fcvtzs w14, s3 ; CHECK-NEXT: fcsel s1, s0, s1, mi +; CHECK-NEXT: strb w14, [x9, #2] ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: fcvtzs w14, s5 ; CHECK-NEXT: fcvtzs w12, s1 -; CHECK-NEXT: strb w14, [x9, #2] ; CHECK-NEXT: strb w12, [x9, #3] ; CHECK-NEXT: add x9, x9, #4 ; CHECK-NEXT: b.ne .LBB3_6 @@ -584,19 +602,24 @@ ; CHECK-NEXT: .LBB3_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64 -; CHECK-NEXT: fcmlt v6.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmgt v6.4s, v2.4s, v1.4s ; CHECK-NEXT: subs x12, x12, #4 -; CHECK-NEXT: fmin v7.4s, v2.4s, v1.4s -; CHECK-NEXT: fcmlt v16.4s, v3.4s, #0.0 -; CHECK-NEXT: fmin v17.4s, v3.4s, v1.4s -; CHECK-NEXT: fmin v18.4s, v4.4s, v1.4s -; CHECK-NEXT: bic v6.16b, v7.16b, v6.16b -; CHECK-NEXT: fcmlt v7.4s, v4.4s, #0.0 -; CHECK-NEXT: bic v16.16b, v17.16b, v16.16b +; CHECK-NEXT: fcmlt v7.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmgt v16.4s, v3.4s, v1.4s +; CHECK-NEXT: fcmgt v19.4s, v4.4s, v1.4s +; CHECK-NEXT: bsl v6.16b, v1.16b, v2.16b +; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0 +; CHECK-NEXT: bsl v16.16b, v1.16b, v3.16b +; CHECK-NEXT: fcmlt v18.4s, v4.4s, #0.0 +; CHECK-NEXT: bic v6.16b, v6.16b, v7.16b +; CHECK-NEXT: fcmgt v7.4s, v5.4s, v1.4s +; CHECK-NEXT: bsl v19.16b, v1.16b, v4.16b +; CHECK-NEXT: bic v16.16b, v16.16b, v17.16b ; CHECK-NEXT: fcmlt v17.4s, v5.4s, #0.0 -; CHECK-NEXT: fmin v2.4s, v5.4s, v1.4s +; CHECK-NEXT: mov v2.16b, v7.16b +; CHECK-NEXT: bsl v2.16b, v1.16b, v5.16b ; CHECK-NEXT: fcvtzs v4.4s, v6.4s -; CHECK-NEXT: bic v3.16b, v18.16b, v7.16b +; CHECK-NEXT: bic v3.16b, v19.16b, v18.16b ; CHECK-NEXT: fcvtzs v5.4s, v16.4s ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: bic v2.16b, v2.16b, v17.16b diff --git a/llvm/test/CodeGen/ARM/neon_minmax.ll b/llvm/test/CodeGen/ARM/neon_minmax.ll --- a/llvm/test/CodeGen/ARM/neon_minmax.ll +++ b/llvm/test/CodeGen/ARM/neon_minmax.ll @@ -3,7 +3,7 @@ define float @fmin_ole(float %x) nounwind { ;CHECK-LABEL: fmin_ole: -;CHECK: vmin.f32 +;CHECK-NOT: vmin.f32 %cond = fcmp ole float 1.0, %x %min1 = select i1 %cond, float 1.0, float %x ret float %min1 @@ -19,7 +19,7 @@ define float @fmin_ult(float %x) nounwind { ;CHECK-LABEL: fmin_ult: -;CHECK: vmin.f32 +;CHECK-NOT: vmin.f32 %cond = fcmp ult float %x, 1.0 %min1 = select i1 %cond, float %x, float 1.0 ret float %min1 @@ -27,7 +27,7 @@ define float @fmax_ogt(float %x) nounwind { ;CHECK-LABEL: fmax_ogt: -;CHECK: vmax.f32 +;CHECK-NOT: vmax.f32 %cond = fcmp ogt float 1.0, %x %max1 = select i1 %cond, float 1.0, float %x ret float %max1 @@ -35,7 +35,7 @@ define float @fmax_uge(float %x) nounwind { ;CHECK-LABEL: fmax_uge: -;CHECK: vmax.f32 +;CHECK-NOT: vmax.f32 %cond = fcmp uge float %x, 1.0 %max1 = select i1 %cond, float %x, float 1.0 ret float %max1 @@ -51,7 +51,7 @@ define float @fmax_olt_reverse(float %x) nounwind { ;CHECK-LABEL: fmax_olt_reverse: -;CHECK: vmax.f32 +;CHECK-NOT: vmax.f32 %cond = fcmp olt float %x, 1.0 %max1 = select i1 %cond, float 1.0, float %x ret float %max1 @@ -59,7 +59,7 @@ define float @fmax_ule_reverse(float %x) nounwind { ;CHECK-LABEL: fmax_ule_reverse: -;CHECK: vmax.f32 +;CHECK-NOT: vmax.f32 %cond = fcmp ult float 1.0, %x %max1 = select i1 %cond, float %x, float 1.0 ret float %max1 @@ -67,7 +67,7 @@ define float @fmin_oge_reverse(float %x) nounwind { ;CHECK-LABEL: fmin_oge_reverse: -;CHECK: vmin.f32 +;CHECK-NOT: vmin.f32 %cond = fcmp oge float %x, 1.0 %min1 = select i1 %cond, float 1.0, float %x ret float %min1 @@ -75,7 +75,7 @@ define float @fmin_ugt_reverse(float %x) nounwind { ;CHECK-LABEL: fmin_ugt_reverse: -;CHECK: vmin.f32 +;CHECK-NOT: vmin.f32 %cond = fcmp ugt float 1.0, %x %min1 = select i1 %cond, float %x, float 1.0 ret float %min1 diff --git a/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll b/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll --- a/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=nvptx | FileCheck %s --check-prefixes=CHECK,CHECK-NONAN -; RUN: llc < %s -march=nvptx -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK,CHECK-NAN +; RUN: llc < %s -march=nvptx | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -march=nvptx -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK ; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %} ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %} @@ -7,9 +7,8 @@ ; CHECK-LABEL: minimum_half define half @minimum_half(half %a) #0 { - ; CHECK-NONAN: setp - ; CHECK-NONAN: selp.b16 - ; CHECK-NAN: min.NaN.f16 + ; CHECK: setp + ; CHECK: selp.b16 %p = fcmp ult half %a, 0.0 %x = select i1 %p, half %a, half 0.0 ret half %x @@ -17,9 +16,8 @@ ; CHECK-LABEL: minimum_float define float @minimum_float(float %a) #0 { - ; CHECK-NONAN: setp - ; CHECK-NONAN: selp.f32 - ; CHECK-NAN: min.NaN.f32 + ; CHECK: setp + ; CHECK: selp.f32 %p = fcmp ult float %a, 0.0 %x = select i1 %p, float %a, float 0.0 ret float %x @@ -36,11 +34,9 @@ ; CHECK-LABEL: minimum_v2half define <2 x half> @minimum_v2half(<2 x half> %a) #0 { - ; CHECK-NONAN-DAG: setp - ; CHECK-NONAN-DAG: setp - ; CHECK-NONAN-DAG: selp.b16 - ; CHECK-NONAN-DAG: selp.b16 - ; CHECK-NAN: min.NaN.f16x2 + ; CHECK-DAG: setp + ; CHECK-DAG: selp.b16 + ; CHECK-DAG: selp.b16 %p = fcmp ult <2 x half> %a, zeroinitializer %x = select <2 x i1> %p, <2 x half> %a, <2 x half> zeroinitializer ret <2 x half> %x @@ -50,9 +46,8 @@ ; CHECK-LABEL: maximum_half define half @maximum_half(half %a) #0 { - ; CHECK-NONAN: setp - ; CHECK-NONAN: selp.b16 - ; CHECK-NAN: max.NaN.f16 + ; CHECK: setp + ; CHECK: selp.b16 %p = fcmp ugt half %a, 0.0 %x = select i1 %p, half %a, half 0.0 ret half %x @@ -60,9 +55,8 @@ ; CHECK-LABEL: maximum_float define float @maximum_float(float %a) #0 { - ; CHECK-NONAN: setp - ; CHECK-NONAN: selp.f32 - ; CHECK-NAN: max.NaN.f32 + ; CHECK: setp + ; CHECK: selp.f32 %p = fcmp ugt float %a, 0.0 %x = select i1 %p, float %a, float 0.0 ret float %x @@ -79,11 +73,9 @@ ; CHECK-LABEL: maximum_v2half define <2 x half> @maximum_v2half(<2 x half> %a) #0 { - ; CHECK-NONAN-DAG: setp - ; CHECK-NONAN-DAG: setp - ; CHECK-NONAN-DAG: selp.b16 - ; CHECK-NONAN-DAG: selp.b16 - ; CHECK-NAN: max.NaN.f16x2 + ; CHECK-DAG: setp + ; CHECK-DAG: selp.b16 + ; CHECK-DAG: selp.b16 %p = fcmp ugt <2 x half> %a, zeroinitializer %x = select <2 x i1> %p, <2 x half> %a, <2 x half> zeroinitializer ret <2 x half> %x diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll --- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll @@ -59,8 +59,7 @@ ; Test a f64 constant compare/select resulting in maximum. define double @f5(double %dummy, double %val) { ; CHECK-LABEL: f5: -; CHECK: lzdr [[REG:%f[0-9]+]] -; CHECK: wfmaxdb %f0, %f2, [[REG]], 1 +; CHECK: ltdbr %f0, %f2 ; CHECK: br %r14 %cmp = fcmp ugt double %val, 0.0 %ret = select i1 %cmp, double %val, double 0.0 @@ -128,8 +127,8 @@ ; Test a f32 constant compare/select resulting in maximum. define float @f15(float %dummy, float %val) { ; CHECK-LABEL: f15: -; CHECK: lzer [[REG:%f[0-9]+]] -; CHECK: wfmaxsb %f0, %f2, [[REG]], 1 +; CHECK: ltebr %f1, %f2 +; CHECK: ldr %f0, %f2 ; CHECK: br %r14 %cmp = fcmp ugt float %val, 0.0 %ret = select i1 %cmp, float %val, float 0.0 @@ -221,7 +220,7 @@ ; CHECK-LABEL: f25: ; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) ; CHECK-DAG: vzero [[REG2:%v[0-9]+]] -; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 1 +; CHECK: wfcxb [[REG1]], [[REG2]] ; CHECK: vst [[RES]], 0(%r3) ; CHECK: br %r14 %val = load fp128, ptr %ptr diff --git a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll --- a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll +++ b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll @@ -45,7 +45,8 @@ define <2 x double> @f5(<2 x double> %val) { ; CHECK-LABEL: f5: ; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfmaxdb %v24, %v24, %v0, 1 +; CHECK-NEXT: vfchedb %v1, %v0, %v24 +; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 ; CHECK-NEXT: br %r14 %cmp = fcmp ugt <2 x double> %val, zeroinitializer %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer @@ -55,7 +56,8 @@ define <2 x double> @f6(<2 x double> %val) { ; CHECK-LABEL: f6: ; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfmindb %v24, %v24, %v0, 1 +; CHECK-NEXT: vfchedb %v1, %v24, %v0 +; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 ; CHECK-NEXT: br %r14 %cmp = fcmp ult <2 x double> %val, zeroinitializer %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer @@ -65,7 +67,8 @@ define <4 x float> @f7(<4 x float> %val) { ; CHECK-LABEL: f7: ; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfmaxsb %v24, %v24, %v0, 1 +; CHECK-NEXT: vfchesb %v1, %v0, %v24 +; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 ; CHECK-NEXT: br %r14 %cmp = fcmp ugt <4 x float> %val, zeroinitializer %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer @@ -75,7 +78,8 @@ define <4 x float> @f8(<4 x float> %val) { ; CHECK-LABEL: f8: ; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfminsb %v24, %v24, %v0, 1 +; CHECK-NEXT: vfchesb %v1, %v24, %v0 +; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 ; CHECK-NEXT: br %r14 %cmp = fcmp ult <4 x float> %val, zeroinitializer %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll --- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll @@ -59,9 +59,8 @@ ; Test a f64 constant compare/select resulting in minimum. define double @f5(double %dummy, double %val) { ; CHECK-LABEL: f5: -; CHECK: lzdr [[REG:%f[0-9]+]] -; CHECK: wfmindb %f0, %f2, [[REG]], 1 -; CHECK: br %r14 +; CHECK: ltdbr %f0, %f2 +; CHECK: bnher %r14 %cmp = fcmp ult double %val, 0.0 %ret = select i1 %cmp, double %val, double 0.0 ret double %ret @@ -128,9 +127,9 @@ ; Test a f32 constant compare/select resulting in minimum. define float @f15(float %dummy, float %val) { ; CHECK-LABEL: f15: -; CHECK: lzer [[REG:%f[0-9]+]] -; CHECK: wfminsb %f0, %f2, [[REG]], 1 -; CHECK: br %r14 +; CHECK: ltebr %f1, %f2 +; CHECK: ldr %f0, %f2 +; CHECK: bnher %r14 %cmp = fcmp ult float %val, 0.0 %ret = select i1 %cmp, float %val, float 0.0 ret float %ret @@ -221,7 +220,7 @@ ; CHECK-LABEL: f25: ; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2) ; CHECK-DAG: vzero [[REG2:%v[0-9]+]] -; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 1 +; CHECK: wfcxb [[REG1]], [[REG2]] ; CHECK: vst [[RES]], 0(%r3) ; CHECK: br %r14 %val = load fp128, ptr %ptr diff --git a/llvm/test/CodeGen/WebAssembly/f32.ll b/llvm/test/CodeGen/WebAssembly/f32.ll --- a/llvm/test/CodeGen/WebAssembly/f32.ll +++ b/llvm/test/CodeGen/WebAssembly/f32.ll @@ -163,27 +163,37 @@ ret float %a } +; This is not "minimum" because a -0.0 input returns +0.0. + define float @fmin32(float %x) { ; CHECK-LABEL: fmin32: ; CHECK: .functype fmin32 (f32) -> (f32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: f32.const $push0=, 0x0p0 -; CHECK-NEXT: f32.min $push1=, $pop2, $pop0 -; CHECK-NEXT: return $pop1 +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: f32.const $push3=, 0x0p0 +; CHECK-NEXT: f32.ge $push1=, $pop4, $pop3 +; CHECK-NEXT: f32.select $push2=, $pop0, $pop5, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ult float %x, 0.0 %b = select i1 %a, float %x, float 0.0 ret float %b } +; This is not "maximum" because a -0.0 input returns +0.0. + define float @fmax32(float %x) { ; CHECK-LABEL: fmax32: ; CHECK: .functype fmax32 (f32) -> (f32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: f32.const $push0=, 0x0p0 -; CHECK-NEXT: f32.max $push1=, $pop2, $pop0 -; CHECK-NEXT: return $pop1 +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: f32.const $push3=, 0x0p0 +; CHECK-NEXT: f32.le $push1=, $pop4, $pop3 +; CHECK-NEXT: f32.select $push2=, $pop0, $pop5, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ugt float %x, 0.0 %b = select i1 %a, float %x, float 0.0 ret float %b diff --git a/llvm/test/CodeGen/WebAssembly/f64.ll b/llvm/test/CodeGen/WebAssembly/f64.ll --- a/llvm/test/CodeGen/WebAssembly/f64.ll +++ b/llvm/test/CodeGen/WebAssembly/f64.ll @@ -163,27 +163,37 @@ ret double %a } +; This is not "minimum" because a -0.0 input returns +0.0. + define double @fmin64(double %x) { ; CHECK-LABEL: fmin64: ; CHECK: .functype fmin64 (f64) -> (f64) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: f64.const $push0=, 0x0p0 -; CHECK-NEXT: f64.min $push1=, $pop2, $pop0 -; CHECK-NEXT: return $pop1 +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: f64.const $push3=, 0x0p0 +; CHECK-NEXT: f64.ge $push1=, $pop4, $pop3 +; CHECK-NEXT: f64.select $push2=, $pop0, $pop5, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ult double %x, 0.0 %b = select i1 %a, double %x, double 0.0 ret double %b } +; This is not "maximum" because a -0.0 input returns +0.0. + define double @fmax64(double %x) { ; CHECK-LABEL: fmax64: ; CHECK: .functype fmax64 (f64) -> (f64) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: f64.const $push0=, 0x0p0 -; CHECK-NEXT: f64.max $push1=, $pop2, $pop0 -; CHECK-NEXT: return $pop1 +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: f64.const $push3=, 0x0p0 +; CHECK-NEXT: f64.le $push1=, $pop4, $pop3 +; CHECK-NEXT: f64.select $push2=, $pop0, $pop5, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ugt double %x, 0.0 %b = select i1 %a, double %x, double 0.0 ret double %b diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -13142,53 +13142,73 @@ ; SIMD128-LABEL: min_unordered_v4f32: ; SIMD128: .functype min_unordered_v4f32 (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f32x4.min $push1=, $0, $pop0 +; SIMD128-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-NEXT: f32x4.gt $push0=, $0, $1 +; SIMD128-NEXT: v128.bitselect $push1=, $pop2, $0, $pop0 ; SIMD128-NEXT: return $pop1 ; ; SIMD128-FAST-LABEL: min_unordered_v4f32: ; SIMD128-FAST: .functype min_unordered_v4f32 (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-FAST-NEXT: f32x4.gt $push1=, $0, $1 +; SIMD128-FAST-NEXT: v128.bitselect $push0=, $pop2, $0, $pop1 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: min_unordered_v4f32: ; NO-SIMD128: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.min $push1=, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.min $push2=, $2, $pop9 -; NO-SIMD128-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push8=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.min $push3=, $1, $pop8 -; NO-SIMD128-NEXT: f32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push7=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.min $push4=, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.gt $push1=, $3, $pop17 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.gt $push3=, $2, $pop15 +; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.gt $push5=, $1, $pop13 +; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.gt $push7=, $4, $pop11 +; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_unordered_v4f32: ; NO-SIMD128-FAST: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.min $push2=, $2, $pop9 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.min $push3=, $3, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop15 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ule <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, @@ -13201,52 +13221,68 @@ ; SIMD128: .functype max_unordered_v4f32 (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: ; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f32x4.max $push1=, $0, $pop0 +; SIMD128-NEXT: f32x4.pmax $push1=, $0, $pop0 ; SIMD128-NEXT: return $pop1 ; ; SIMD128-FAST-LABEL: max_unordered_v4f32: ; SIMD128-FAST: .functype max_unordered_v4f32 (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: ; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $pop1 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: max_unordered_v4f32: ; NO-SIMD128: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.max $push1=, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.max $push2=, $2, $pop9 -; NO-SIMD128-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push8=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.max $push3=, $1, $pop8 -; NO-SIMD128-NEXT: f32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push7=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.max $push4=, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.lt $push1=, $3, $pop17 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.lt $push3=, $2, $pop15 +; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.lt $push5=, $1, $pop13 +; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.lt $push7=, $4, $pop11 +; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_unordered_v4f32: ; NO-SIMD128-FAST: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.max $push2=, $2, $pop9 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.max $push3=, $3, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop15 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp uge <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, @@ -13258,53 +13294,73 @@ ; SIMD128-LABEL: min_ordered_v4f32: ; SIMD128: .functype min_ordered_v4f32 (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f32x4.min $push1=, $0, $pop0 +; SIMD128-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-NEXT: f32x4.le $push0=, $1, $0 +; SIMD128-NEXT: v128.bitselect $push1=, $pop2, $0, $pop0 ; SIMD128-NEXT: return $pop1 ; ; SIMD128-FAST-LABEL: min_ordered_v4f32: ; SIMD128-FAST: .functype min_ordered_v4f32 (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-FAST-NEXT: f32x4.le $push1=, $1, $0 +; SIMD128-FAST-NEXT: v128.bitselect $push0=, $pop2, $0, $pop1 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: min_ordered_v4f32: ; NO-SIMD128: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.min $push1=, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.min $push2=, $2, $pop9 -; NO-SIMD128-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push8=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.min $push3=, $1, $pop8 -; NO-SIMD128-NEXT: f32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push7=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.min $push4=, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.ge $push1=, $3, $pop17 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.ge $push3=, $2, $pop15 +; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.ge $push5=, $1, $pop13 +; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.ge $push7=, $4, $pop11 +; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_ordered_v4f32: ; NO-SIMD128-FAST: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.min $push2=, $2, $pop9 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.min $push3=, $3, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop15 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ole <4 x float> , %x %a = select <4 x i1> %cmps, @@ -13316,53 +13372,73 @@ ; SIMD128-LABEL: max_ordered_v4f32: ; SIMD128: .functype max_ordered_v4f32 (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f32x4.max $push1=, $0, $pop0 +; SIMD128-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-NEXT: f32x4.ge $push0=, $1, $0 +; SIMD128-NEXT: v128.bitselect $push1=, $pop2, $0, $pop0 ; SIMD128-NEXT: return $pop1 ; ; SIMD128-FAST-LABEL: max_ordered_v4f32: ; SIMD128-FAST: .functype max_ordered_v4f32 (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-FAST-NEXT: f32x4.ge $push1=, $1, $0 +; SIMD128-FAST-NEXT: v128.bitselect $push0=, $pop2, $0, $pop1 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: max_ordered_v4f32: ; NO-SIMD128: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.max $push1=, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.max $push2=, $2, $pop9 -; NO-SIMD128-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push8=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.max $push3=, $1, $pop8 -; NO-SIMD128-NEXT: f32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push7=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.max $push4=, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.le $push1=, $3, $pop17 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.le $push3=, $2, $pop15 +; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.le $push5=, $1, $pop13 +; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.le $push7=, $4, $pop11 +; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_ordered_v4f32: ; NO-SIMD128-FAST: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.max $push2=, $2, $pop9 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.max $push3=, $3, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop15 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp oge <4 x float> , %x %a = select <4 x i1> %cmps, @@ -14196,37 +14272,49 @@ ; SIMD128-LABEL: min_unordered_v2f64: ; SIMD128: .functype min_unordered_v2f64 (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f64x2.min $push1=, $0, $pop0 +; SIMD128-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-NEXT: f64x2.gt $push0=, $0, $1 +; SIMD128-NEXT: v128.bitselect $push1=, $pop2, $0, $pop0 ; SIMD128-NEXT: return $pop1 ; ; SIMD128-FAST-LABEL: min_unordered_v2f64: ; SIMD128-FAST: .functype min_unordered_v2f64 (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2 -; SIMD128-FAST-NEXT: f64x2.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-FAST-NEXT: f64x2.gt $push1=, $0, $1 +; SIMD128-FAST-NEXT: v128.bitselect $push0=, $pop2, $0, $pop1 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: min_unordered_v2f64: ; NO-SIMD128: .functype min_unordered_v2f64 (i32, f64, f64) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f64.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f64.min $push1=, $2, $pop0 -; NO-SIMD128-NEXT: f64.store 8($0), $pop1 -; NO-SIMD128-NEXT: f64.const $push3=, 0x1.4p2 -; NO-SIMD128-NEXT: f64.min $push2=, $1, $pop3 -; NO-SIMD128-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-NEXT: f64.const $push7=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.gt $push1=, $2, $pop7 +; NO-SIMD128-NEXT: f64.select $push2=, $pop0, $2, $pop1 +; NO-SIMD128-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-NEXT: f64.const $push6=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.const $push5=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.gt $push3=, $1, $pop5 +; NO-SIMD128-NEXT: f64.select $push4=, $pop6, $1, $pop3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_unordered_v2f64: ; NO-SIMD128-FAST: .functype min_unordered_v2f64 (i32, f64, f64) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f64.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f64.min $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f64.const $push3=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f64.min $push2=, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: f64.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.gt $push1=, $1, $pop7 +; NO-SIMD128-FAST-NEXT: f64.select $push2=, $pop0, $1, $pop1 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: f64.const $push6=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.const $push5=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.gt $push3=, $2, $pop5 +; NO-SIMD128-FAST-NEXT: f64.select $push4=, $pop6, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ule <2 x double> %x, %a = select <2 x i1> %cmps, <2 x double> %x, @@ -14239,36 +14327,44 @@ ; SIMD128: .functype max_unordered_v2f64 (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: ; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f64x2.max $push1=, $0, $pop0 +; SIMD128-NEXT: f64x2.pmax $push1=, $0, $pop0 ; SIMD128-NEXT: return $pop1 ; ; SIMD128-FAST-LABEL: max_unordered_v2f64: ; SIMD128-FAST: .functype max_unordered_v2f64 (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: ; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2 -; SIMD128-FAST-NEXT: f64x2.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: f64x2.pmax $push0=, $0, $pop1 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: max_unordered_v2f64: ; NO-SIMD128: .functype max_unordered_v2f64 (i32, f64, f64) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f64.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f64.max $push1=, $2, $pop0 -; NO-SIMD128-NEXT: f64.store 8($0), $pop1 -; NO-SIMD128-NEXT: f64.const $push3=, 0x1.4p2 -; NO-SIMD128-NEXT: f64.max $push2=, $1, $pop3 -; NO-SIMD128-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-NEXT: f64.const $push7=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.lt $push1=, $2, $pop7 +; NO-SIMD128-NEXT: f64.select $push2=, $pop0, $2, $pop1 +; NO-SIMD128-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-NEXT: f64.const $push6=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.const $push5=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.lt $push3=, $1, $pop5 +; NO-SIMD128-NEXT: f64.select $push4=, $pop6, $1, $pop3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_unordered_v2f64: ; NO-SIMD128-FAST: .functype max_unordered_v2f64 (i32, f64, f64) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f64.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f64.max $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f64.const $push3=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f64.max $push2=, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: f64.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.lt $push1=, $1, $pop7 +; NO-SIMD128-FAST-NEXT: f64.select $push2=, $pop0, $1, $pop1 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: f64.const $push6=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.const $push5=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.lt $push3=, $2, $pop5 +; NO-SIMD128-FAST-NEXT: f64.select $push4=, $pop6, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp uge <2 x double> %x, %a = select <2 x i1> %cmps, <2 x double> %x, @@ -14280,37 +14376,49 @@ ; SIMD128-LABEL: min_ordered_v2f64: ; SIMD128: .functype min_ordered_v2f64 (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f64x2.min $push1=, $0, $pop0 +; SIMD128-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-NEXT: f64x2.le $push0=, $1, $0 +; SIMD128-NEXT: v128.bitselect $push1=, $pop2, $0, $pop0 ; SIMD128-NEXT: return $pop1 ; ; SIMD128-FAST-LABEL: min_ordered_v2f64: ; SIMD128-FAST: .functype min_ordered_v2f64 (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2 -; SIMD128-FAST-NEXT: f64x2.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-FAST-NEXT: f64x2.le $push1=, $1, $0 +; SIMD128-FAST-NEXT: v128.bitselect $push0=, $pop2, $0, $pop1 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: min_ordered_v2f64: ; NO-SIMD128: .functype min_ordered_v2f64 (i32, f64, f64) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f64.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f64.min $push1=, $2, $pop0 -; NO-SIMD128-NEXT: f64.store 8($0), $pop1 -; NO-SIMD128-NEXT: f64.const $push3=, 0x1.4p2 -; NO-SIMD128-NEXT: f64.min $push2=, $1, $pop3 -; NO-SIMD128-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-NEXT: f64.const $push7=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.ge $push1=, $2, $pop7 +; NO-SIMD128-NEXT: f64.select $push2=, $pop0, $2, $pop1 +; NO-SIMD128-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-NEXT: f64.const $push6=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.const $push5=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.ge $push3=, $1, $pop5 +; NO-SIMD128-NEXT: f64.select $push4=, $pop6, $1, $pop3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_ordered_v2f64: ; NO-SIMD128-FAST: .functype min_ordered_v2f64 (i32, f64, f64) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f64.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f64.min $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f64.const $push3=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f64.min $push2=, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: f64.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.ge $push1=, $1, $pop7 +; NO-SIMD128-FAST-NEXT: f64.select $push2=, $pop0, $1, $pop1 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: f64.const $push6=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.const $push5=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.ge $push3=, $2, $pop5 +; NO-SIMD128-FAST-NEXT: f64.select $push4=, $pop6, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ole <2 x double> , %x %a = select <2 x i1> %cmps, <2 x double> , @@ -14322,37 +14430,49 @@ ; SIMD128-LABEL: max_ordered_v2f64: ; SIMD128: .functype max_ordered_v2f64 (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f64x2.max $push1=, $0, $pop0 +; SIMD128-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-NEXT: f64x2.ge $push0=, $1, $0 +; SIMD128-NEXT: v128.bitselect $push1=, $pop2, $0, $pop0 ; SIMD128-NEXT: return $pop1 ; ; SIMD128-FAST-LABEL: max_ordered_v2f64: ; SIMD128-FAST: .functype max_ordered_v2f64 (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2 -; SIMD128-FAST-NEXT: f64x2.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: v128.const $push3=, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: local.tee $push2=, $1=, $pop3 +; SIMD128-FAST-NEXT: f64x2.ge $push1=, $1, $0 +; SIMD128-FAST-NEXT: v128.bitselect $push0=, $pop2, $0, $pop1 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: max_ordered_v2f64: ; NO-SIMD128: .functype max_ordered_v2f64 (i32, f64, f64) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f64.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f64.max $push1=, $2, $pop0 -; NO-SIMD128-NEXT: f64.store 8($0), $pop1 -; NO-SIMD128-NEXT: f64.const $push3=, 0x1.4p2 -; NO-SIMD128-NEXT: f64.max $push2=, $1, $pop3 -; NO-SIMD128-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-NEXT: f64.const $push7=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.le $push1=, $2, $pop7 +; NO-SIMD128-NEXT: f64.select $push2=, $pop0, $2, $pop1 +; NO-SIMD128-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-NEXT: f64.const $push6=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.const $push5=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.le $push3=, $1, $pop5 +; NO-SIMD128-NEXT: f64.select $push4=, $pop6, $1, $pop3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_ordered_v2f64: ; NO-SIMD128-FAST: .functype max_ordered_v2f64 (i32, f64, f64) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f64.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f64.max $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f64.const $push3=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f64.max $push2=, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: f64.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.le $push1=, $1, $pop7 +; NO-SIMD128-FAST-NEXT: f64.select $push2=, $pop0, $1, $pop1 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: f64.const $push6=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.const $push5=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.le $push3=, $2, $pop5 +; NO-SIMD128-FAST-NEXT: f64.select $push4=, $pop6, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp oge <2 x double> , %x %a = select <2 x i1> %cmps, <2 x double> ,