diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2415,6 +2415,8 @@ ISD::STRICT_FMA, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUM, + ISD::FMAXIMUM, ISD::SUB, ISD::LOAD, ISD::MLOAD, @@ -53210,6 +53212,75 @@ return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); } +static SDValue combineFMinimumFMaximum(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + assert((N->getOpcode() == ISD::FMAXIMUM || + N->getOpcode() == ISD::FMINIMUM) && + "Expected FMAXIMUM or FMINIMUM opcode"); + EVT VT = N->getValueType(0); + if (Subtarget.useSoftFloat()) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (!((Subtarget.hasSSE1() && VT == MVT::f32) || + (Subtarget.hasSSE2() && VT == MVT::f64))) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDLoc DL(N); + uint64_t SizeInBits = VT.getFixedSizeInBits(); + APInt PreferredZero; + EVT IVT = MVT::getIntegerVT(SizeInBits); + X86ISD::NodeType MinMaxOp; + if (N->getOpcode() == ISD::FMAXIMUM) { + PreferredZero = APInt::getZero(SizeInBits); + MinMaxOp = X86ISD::FMAX; + } else { + PreferredZero = APInt::getSignedMinValue(SizeInBits); + MinMaxOp = X86ISD::FMIN; + } + EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + VT); + + // We reuse FMAX and FMIN operations which are not commutative. They return + // the second operand if at least one of them is NaN or if both operands are + // equal (even if they are zeroes with different sign). + // + // Here we try to determine the correct order. + // + // We check if any of operands is NaN and return NaN. Then check if any of + // operands is zero or negative zero (for fmaximum and fminimum respectively) + // to ensure the correct zero is returned. + auto IsPreferredZero = [PreferredZero](SDValue Op) { + Op = peekThroughBitcasts(Op); + if (ConstantFPSDNode *CstOp = dyn_cast(Op)) + return CstOp->getValueAPF().bitcastToAPInt() == PreferredZero; + if (ConstantSDNode *CstOp = dyn_cast(Op)) + return CstOp->getAPIntValue() == PreferredZero; + return false; + }; + + SDValue MinMax; + if (IsPreferredZero(Op0)) { + MinMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags()); + } else if (IsPreferredZero(Op1)) { + MinMax = DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); + } else { + SDValue IsOp0Zero = DAG.getSetCC(DL, SetCCType, + DAG.getNode(ISD::BITCAST, DL, IVT, Op0), + DAG.getConstant(PreferredZero, DL, IVT), + ISD::SETEQ); + SDValue NewOp0 = DAG.getSelect(DL, VT, IsOp0Zero, Op1, Op0); + SDValue NewOp1 = DAG.getSelect(DL, VT, IsOp0Zero, Op0, Op1); + MinMax = DAG.getNode(MinMaxOp, DL, VT, NewOp0, NewOp1, N->getFlags()); + } + APFloat NaNValue = APFloat::getNaN(VT == MVT::f32 ? APFloat::IEEEsingle() : APFloat::IEEEdouble()); + SDValue isNan = DAG.getSetCC(DL, SetCCType, Op0, Op1, ISD::SETUO); + return DAG.getSelect(DL, VT, isNan, DAG.getConstantFP(NaNValue, DL, VT), MinMax); +} + static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); @@ -57217,6 +57288,8 @@ case X86ISD::FMAX: return combineFMinFMax(N, DAG); case ISD::FMINNUM: case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget); + case ISD::FMINIMUM: + case ISD::FMAXIMUM: return combineFMinimumFMaximum(N, DAG, Subtarget); case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI); case X86ISD::CVTP2SI: diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -0,0 +1,364 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 + +declare float @llvm.maximum.f32(float, float) +declare double @llvm.maximum.f64(double, double) +declare float @llvm.minimum.f32(float, float) +declare double @llvm.minimum.f64(double, double) + +; +; fmaximum +; + +define float @test_fmaximum(float %x, float %y) { +; SSE2-LABEL: test_fmaximum: +; SSE2: # %bb.0: +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: je .LBB0_2 +; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: .LBB0_2: +; SSE2-NEXT: maxss %xmm3, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: test_fmaximum: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: testl %eax, %eax +; AVX1-NEXT: vmovdqa %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa %xmm1, %xmm3 +; AVX1-NEXT: je .LBB0_2 +; AVX1-NEXT: # %bb.1: +; AVX1-NEXT: vmovdqa %xmm1, %xmm2 +; AVX1-NEXT: vmovdqa %xmm0, %xmm3 +; AVX1-NEXT: .LBB0_2: +; AVX1-NEXT: vmaxss %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test_fmaximum: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: testl %eax, %eax +; AVX512-NEXT: sete %al +; AVX512-NEXT: kmovw %eax, %k1 +; AVX512-NEXT: vmovdqa %xmm0, %xmm2 +; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vcmpunordss %xmm1, %xmm0, %k2 +; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmaxss %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} +; AVX512-NEXT: retq + %1 = tail call float @llvm.maximum.f32(float %x, float %y) + ret float %1 +} + +define float @test_fmaximum_nan0(float %x, float %y) { +; SSE2-LABEL: test_fmaximum_nan0: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fmaximum_nan0: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: retq + %1 = tail call float @llvm.maximum.f32(float 0x7fff000000000000, float %y) + ret float %1 +} + +define float @test_fmaximum_nan1(float %x, float %y) { +; SSE2-LABEL: test_fmaximum_nan1: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fmaximum_nan1: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: retq + %1 = tail call float @llvm.maximum.f32(float %x, float 0x7fff000000000000) + ret float %1 +} + +define double @test_fmaximum_zero0(double %x, double %y) { +; SSE2-LABEL: test_fmaximum_zero0: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: cmpunordsd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: xorpd %xmm3, %xmm3 +; SSE2-NEXT: maxsd %xmm3, %xmm1 +; SSE2-NEXT: andnpd %xmm1, %xmm0 +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: test_fmaximum_zero0: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test_fmaximum_zero0: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} +; AVX512-NEXT: retq + %1 = tail call double @llvm.maximum.f64(double 0.0, double %y) + ret double %1 +} + +define double @test_fmaximum_zero1(double %x, double %y) { +; SSE2-LABEL: test_fmaximum_zero1: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; SSE2-NEXT: andpd %xmm1, %xmm2 +; SSE2-NEXT: xorpd %xmm3, %xmm3 +; SSE2-NEXT: maxsd %xmm3, %xmm0 +; SSE2-NEXT: andnpd %xmm0, %xmm1 +; SSE2-NEXT: orpd %xmm2, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: test_fmaximum_zero1: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test_fmaximum_zero1: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 {%k1} +; AVX512-NEXT: vmovapd %xmm1, %xmm0 +; AVX512-NEXT: retq + %1 = tail call double @llvm.maximum.f64(double %x, double 0.0) + ret double %1 +} + +define double @test_fmaximum_zero2(double %x, double %y) { +; SSE2-LABEL: test_fmaximum_zero2: +; SSE2: # %bb.0: +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fmaximum_zero2: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = tail call double @llvm.maximum.f64(double 0.0, double -0.0) + ret double %1 +} + +define double @test_fmaximum_const(double %x, double %y) { +; SSE2-LABEL: test_fmaximum_const: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fmaximum_const: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: retq + %1 = tail call double @llvm.maximum.f64(double 42.5, double -42.5) + ret double %1 +} + +; +; fminimum +; + +define float @test_fminimum(float %x, float %y) { +; SSE2-LABEL: test_fminimum: +; SSE2: # %bb.0: +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: je .LBB7_2 +; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: .LBB7_2: +; SSE2-NEXT: minss %xmm3, %xmm2 +; SSE2-NEXT: cmpunordss %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm0, %xmm3 +; SSE2-NEXT: andnps %xmm2, %xmm3 +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: andps %xmm0, %xmm1 +; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: test_fminimum: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 +; AVX1-NEXT: vmovdqa %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa %xmm1, %xmm3 +; AVX1-NEXT: je .LBB7_2 +; AVX1-NEXT: # %bb.1: +; AVX1-NEXT: vmovdqa %xmm1, %xmm2 +; AVX1-NEXT: vmovdqa %xmm0, %xmm3 +; AVX1-NEXT: .LBB7_2: +; AVX1-NEXT: vminss %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test_fminimum: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 +; AVX512-NEXT: sete %al +; AVX512-NEXT: kmovw %eax, %k1 +; AVX512-NEXT: vmovdqa %xmm0, %xmm2 +; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vcmpunordss %xmm1, %xmm0, %k2 +; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vminss %xmm1, %xmm2, %xmm0 +; AVX512-NEXT: vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k2} +; AVX512-NEXT: retq + %1 = tail call float @llvm.minimum.f32(float %x, float %y) + ret float %1 +} + +define float @test_fminimum_nan0(float %x, float %y) { +; SSE2-LABEL: test_fminimum_nan0: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fminimum_nan0: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: retq + %1 = tail call float @llvm.minimum.f32(float 0x7fff000000000000, float %y) + ret float %1 +} + +define float @test_fminimum_nan1(float %x, float %y) { +; SSE2-LABEL: test_fminimum_nan1: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fminimum_nan1: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: retq + %1 = tail call float @llvm.minimum.f32(float %x, float 0x7fff000000000000) + ret float %1 +} + +define double @test_fminimum_zero0(double %x, double %y) { +; SSE2-LABEL: test_fminimum_zero0: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: cmpunordsd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: andnpd %xmm1, %xmm0 +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: test_fminimum_zero0: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm0 +; AVX1-NEXT: vminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test_fminimum_zero0: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; AVX512-NEXT: vminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} +; AVX512-NEXT: retq + %1 = tail call double @llvm.minimum.f64(double -0.0, double %y) + ret double %1 +} + +define double @test_fminimum_zero1(double %x, double %y) { +; SSE2-LABEL: test_fminimum_zero1: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; SSE2-NEXT: andpd %xmm1, %xmm2 +; SSE2-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: andnpd %xmm0, %xmm1 +; SSE2-NEXT: orpd %xmm2, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX1-LABEL: test_fminimum_zero1: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm1 +; AVX1-NEXT: vminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test_fminimum_zero1: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512-NEXT: vminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} +; AVX512-NEXT: retq + %1 = tail call double @llvm.minimum.f64(double %x, double -0.0) + ret double %1 +} + +define double @test_fminimum_zero2(double %x, double %y) { +; SSE2-LABEL: test_fminimum_zero2: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fminimum_zero2: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: retq + %1 = tail call double @llvm.minimum.f64(double -0.0, double 0.0) + ret double %1 +} + +define double @test_fminimum_const(double %x, double %y) { +; SSE2-LABEL: test_fminimum_const: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fminimum_const: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: retq + %1 = tail call double @llvm.minimum.f64(double 42.5, double -42.5) + ret double %1 +}