Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30266,10 +30266,12 @@ SDLoc DL(Op); uint64_t SizeInBits = VT.getScalarSizeInBits(); APInt PreferredZero = APInt::getZero(SizeInBits); + APInt OppositeZero = PreferredZero; EVT IVT = VT.changeTypeToInteger(); X86ISD::NodeType MinMaxOp; if (Op.getOpcode() == ISD::FMAXIMUM) { MinMaxOp = X86ISD::FMAX; + OppositeZero.setSignBit(); } else { PreferredZero.setSignBit(); MinMaxOp = X86ISD::FMIN; @@ -30294,7 +30296,7 @@ // We check if any of operands is NaN and return NaN. Then we check if any of // operands is zero or negative zero (for fmaximum and fminimum respectively) // to ensure the correct zero is returned. - auto IsPreferredZero = [PreferredZero](SDValue Op) { + auto MatchesZero = [](SDValue Op, APInt PreferredZero) { Op = peekThroughBitcasts(Op); if (auto *CstOp = dyn_cast(Op)) return CstOp->getValueAPF().bitcastToAPInt() == PreferredZero; @@ -30323,11 +30325,12 @@ DAG.isKnownNeverZeroFloat(X) || DAG.isKnownNeverZeroFloat(Y); SDValue NewX, NewY; - if (IgnoreSignedZero || IsPreferredZero(Y)) { + if (IgnoreSignedZero || MatchesZero(Y, PreferredZero) || + MatchesZero(X, OppositeZero)) { // Operands are already in right order or order does not matter. NewX = X; NewY = Y; - } else if (IsPreferredZero(X)) { + } else if (MatchesZero(X, PreferredZero) || MatchesZero(Y, OppositeZero)) { NewX = Y; NewY = X; } else if (!VT.isVector() && (VT == MVT::f16 || Subtarget.hasDQI()) && Index: llvm/test/CodeGen/X86/fminimum-fmaximum.ll =================================================================== --- llvm/test/CodeGen/X86/fminimum-fmaximum.ll +++ llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -1042,42 +1042,21 @@ define <2 x double> @test_fminimum_vector_zero(<2 x double> %x) { ; SSE2-LABEL: test_fminimum_vector_zero: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: andps %xmm2, %xmm1 -; SSE2-NEXT: andnps %xmm0, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm3 -; SSE2-NEXT: minpd %xmm1, %xmm3 -; SSE2-NEXT: movaps %xmm2, %xmm0 -; SSE2-NEXT: cmpunordpd %xmm2, %xmm0 -; SSE2-NEXT: andpd %xmm0, %xmm2 -; SSE2-NEXT: andnpd %xmm3, %xmm0 -; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: minpd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_fminimum_vector_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vpandn %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vminpd %xmm2, %xmm0, %xmm1 -; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: test_fminimum_vector_zero: ; X86: # %bb.0: -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 -; X86-NEXT: vpand %xmm0, %xmm1, %xmm2 -; X86-NEXT: vpandn %xmm0, %xmm1, %xmm0 -; X86-NEXT: vminpd %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X86-NEXT: vminpd %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r @@ -1086,54 +1065,27 @@ define <4 x float> @test_fmaximum_vector_signed_zero(<4 x float> %x) { ; SSE2-LABEL: test_fmaximum_vector_signed_zero: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: pandn %xmm0, %xmm4 -; SSE2-NEXT: por %xmm2, %xmm4 -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: pandn %xmm1, %xmm3 -; SSE2-NEXT: por %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: maxps %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: cmpunordps %xmm0, %xmm2 -; SSE2-NEXT: andps %xmm2, %xmm0 -; SSE2-NEXT: andnps %xmm1, %xmm2 -; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; SSE2-NEXT: maxps %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fmaximum_vector_signed_zero: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; AVX1-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 -; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: test_fmaximum_vector_signed_zero: ; AVX512: # %bb.0: ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; AVX512-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 -; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq ; ; X86-LABEL: test_fmaximum_vector_signed_zero: ; X86: # %bb.0: ; X86-NEXT: vmovaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 -; X86-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 -; X86-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r @@ -1265,42 +1217,21 @@ define <2 x double> @test_fminimum_vector_zero_first(<2 x double> %x) { ; SSE2-LABEL: test_fminimum_vector_zero_first: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: andps %xmm2, %xmm1 -; SSE2-NEXT: andnps %xmm0, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm3 -; SSE2-NEXT: minpd %xmm1, %xmm3 -; SSE2-NEXT: movaps %xmm2, %xmm0 -; SSE2-NEXT: cmpunordpd %xmm2, %xmm0 -; SSE2-NEXT: andpd %xmm0, %xmm2 -; SSE2-NEXT: andnpd %xmm3, %xmm0 -; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: minpd %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_fminimum_vector_zero_first: ; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 -; AVX-NEXT: vpand %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vpandn %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vminpd %xmm2, %xmm0, %xmm1 -; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: test_fminimum_vector_zero_first: ; X86: # %bb.0: -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1 -; X86-NEXT: vpand %xmm0, %xmm1, %xmm2 -; X86-NEXT: vpandn %xmm0, %xmm1, %xmm0 -; X86-NEXT: vminpd %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X86-NEXT: vminpd %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %r @@ -1338,54 +1269,27 @@ define <4 x float> @test_fmaximum_vector_signed_zero_first(<4 x float> %x) { ; SSE2-LABEL: test_fmaximum_vector_signed_zero_first: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: pandn %xmm0, %xmm4 -; SSE2-NEXT: por %xmm2, %xmm4 -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: pandn %xmm1, %xmm3 -; SSE2-NEXT: por %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: maxps %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: cmpunordps %xmm0, %xmm2 -; SSE2-NEXT: andps %xmm2, %xmm0 -; SSE2-NEXT: andnps %xmm1, %xmm2 -; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; SSE2-NEXT: maxps %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; AVX1-LABEL: test_fmaximum_vector_signed_zero_first: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; AVX1-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 -; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: test_fmaximum_vector_signed_zero_first: ; AVX512: # %bb.0: ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; AVX512-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; AVX512-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 -; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq ; ; X86-LABEL: test_fmaximum_vector_signed_zero_first: ; X86: # %bb.0: ; X86-NEXT: vmovaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] -; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2 -; X86-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0 -; X86-NEXT: vmaxps %xmm2, %xmm0, %xmm1 -; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2 -; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; X86-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; X86-NEXT: retl %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> , <4 x float> %x) ret <4 x float> %r