Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16848,17 +16848,40 @@ if (ConstantFPSDNode *CFP = dyn_cast(N1)) { // Allow either -0.0 or 0.0 if (CFP->isZero()) { + // These patterns should produce -0.0 on one of the zero inputs. Check + // for nsz before folding. // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs - if ((CC == ISD::SETGE || CC == ISD::SETGT) && - N0 == N2 && N3.getOpcode() == ISD::FNEG && - N2 == N3.getOperand(0)) + if ((CC == ISD::SETGE || CC == ISD::SETGT) && N0 == N2 && + N3.getOpcode() == ISD::FNEG && + N3.getNode()->getFlags().hasNoSignedZeros() && N2 == N3.getOperand(0)) return DAG.getNode(ISD::FABS, DL, VT, N0); // select (setl[te] X, +/-0.0), fneg(X), X -> fabs - if ((CC == ISD::SETLT || CC == ISD::SETLE) && - N0 == N3 && N2.getOpcode() == ISD::FNEG && - N2.getOperand(0) == N3) + if ((CC == ISD::SETLT || CC == ISD::SETLE) && N0 == N3 && + N2.getOpcode() == ISD::FNEG && + N2.getNode()->getFlags().hasNoSignedZeros() && N2.getOperand(0) == N3) return DAG.getNode(ISD::FABS, DL, VT, N3); + + // These patterns are sign-of-zero compliant, but the zero values must be + // routed to the (0.0 - X) option, so only 2 out of 4 conditions are + // supported. + // select (setgt X, +/-0.0), X, (fsub 0.0, X) -> fabs + if (CC == ISD::SETGT && N0 == N2 && N3.getOpcode() == ISD::FSUB && + N0 == N3.getOperand(1)) { + if (auto *CZ = dyn_cast(N3.getOperand(0))) { + if (CZ->isZero() && !CZ->isNegative()) + return DAG.getNode(ISD::FABS, DL, VT, N0); + } + } + + // select (setle X, +/-0.0), (fsub 0.0, X), X -> fabs + if (CC == ISD::SETLE && N0 == N3 && N2.getOpcode() == ISD::FSUB && + N0 == N2.getOperand(1)) { + if (auto CZ = dyn_cast(N2.getOperand(0))) { + if (CZ->isZero() && !CZ->isNegative()) + return DAG.getNode(ISD::FABS, DL, VT, N3); + } + } } } Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -257,7 +257,7 @@ ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } - return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { @@ -1339,8 +1339,8 @@ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); } else { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags()); } } @@ -2669,7 +2669,7 @@ // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2562,8 +2562,11 @@ if (isa(I.getOperand(0)) && I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { SDValue Op2 = getValue(I.getOperand(1)); + SDNodeFlags FMF = SDNodeFlags(); + if (cast(&I)->hasNoSignedZeros()) + FMF.setNoSignedZeros(true); setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), - Op2.getValueType(), Op2)); + Op2.getValueType(), Op2, FMF)); return; } Index: test/CodeGen/X86/fabs.ll =================================================================== --- test/CodeGen/X86/fabs.ll +++ test/CodeGen/X86/fabs.ll @@ -52,7 +52,20 @@ ; X87UNSAFE-LABEL: test2: ; X87UNSAFE: # %bb.0: ; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) -; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fldz +; X87UNSAFE-NEXT: fchs +; X87UNSAFE-NEXT: fucomp %st(1) +; X87UNSAFE-NEXT: fnstsw %ax +; X87UNSAFE-NEXT: # kill: def $ah killed $ah killed $ax +; X87UNSAFE-NEXT: sahf +; X87UNSAFE-NEXT: fld %st(0) +; X87UNSAFE-NEXT: fchs +; X87UNSAFE-NEXT: jbe .LBB1_2 +; X87UNSAFE-NEXT: # %bb.1: +; X87UNSAFE-NEXT: fstp %st(1) +; X87UNSAFE-NEXT: fldz +; X87UNSAFE-NEXT: .LBB1_2: +; X87UNSAFE-NEXT: fstp %st(0) ; X87UNSAFE-NEXT: retl ; ; X64-LABEL: test2: @@ -96,3 +109,426 @@ ret x86_fp80 %Y } +define double @test4(double %X) { +; CHECK-LABEL: test4: +; CHECK: ## %bb.0: +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: ## kill: def %ah killed %ah killed %ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fsubr %st(0), %st(1) +; CHECK-NEXT: ja LBB3_2 +; CHECK-NEXT: ## %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: LBB3_2: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: retl +; +; UNSAFE-LABEL: test4: +; UNSAFE: ## %bb.0: +; UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; UNSAFE-NEXT: fabs +; UNSAFE-NEXT: retl +; +; NOOPT-LABEL: test4: +; NOOPT: ## %bb.0: +; NOOPT-NEXT: xorps %xmm1, %xmm1 +; NOOPT-NEXT: movaps %xmm1, %xmm2 +; NOOPT-NEXT: subsd %xmm0, %xmm2 +; NOOPT-NEXT: cmpltsd %xmm0, %xmm1 +; NOOPT-NEXT: movaps %xmm1, %xmm3 +; NOOPT-NEXT: andpd %xmm0, %xmm3 +; NOOPT-NEXT: andnpd %xmm2, %xmm1 +; NOOPT-NEXT: orpd %xmm3, %xmm1 +; NOOPT-NEXT: movaps %xmm1, %xmm0 +; NOOPT-NEXT: retq +; X87-LABEL: test4: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldz +; X87-NEXT: fxch %st(1) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: fsubr %st(0), %st(1) +; X87-NEXT: ja .LBB3_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: fstp %st(0) +; X87-NEXT: fldz +; X87-NEXT: fxch %st(1) +; X87-NEXT: .LBB3_2: +; X87-NEXT: fstp %st(1) +; X87-NEXT: retl +; +; X87UNSAFE-LABEL: test4: +; X87UNSAFE: # %bb.0: +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: retl +; +; X64-LABEL: test4: +; X64: # %bb.0: +; X64-NEXT: xorpd %xmm1, %xmm1 +; X64-NEXT: xorpd %xmm2, %xmm2 +; X64-NEXT: subsd %xmm0, %xmm2 +; X64-NEXT: cmpltsd %xmm0, %xmm1 +; X64-NEXT: movapd %xmm1, %xmm3 +; X64-NEXT: andnpd %xmm2, %xmm3 +; X64-NEXT: andpd %xmm0, %xmm1 +; X64-NEXT: orpd %xmm3, %xmm1 +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: retq + %Y = fcmp ogt double %X, zeroinitializer + %Z = fsub double zeroinitializer, %X + %Q = select i1 %Y, double %X, double %Z + ret double %Q +} + +define double @test5(double %X) { +; CHECK-LABEL: test5: +; CHECK: ## %bb.0: +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: fldz +; CHECK-NEXT: fchs +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: ## kill: def %ah killed %ah killed %ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: fchs +; CHECK-NEXT: jae LBB4_2 +; CHECK-NEXT: ## %bb.1: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: LBB4_2: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: retl +; +; UNSAFE-LABEL: test5: +; UNSAFE: ## %bb.0: +; UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; UNSAFE-NEXT: fabs +; UNSAFE-NEXT: retl +; +; NOOPT-LABEL: test5: +; NOOPT: ## %bb.0: +; NOOPT-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NOOPT-NEXT: movabsq $-9223372036854775808, %rax ## imm = 0x8000000000000000 +; NOOPT-NEXT: movq %xmm0, %rcx +; NOOPT-NEXT: xorq %rax, %rcx +; NOOPT-NEXT: movq %rcx, %xmm2 +; NOOPT-NEXT: cmplesd %xmm0, %xmm1 +; NOOPT-NEXT: movaps %xmm1, %xmm3 +; NOOPT-NEXT: andpd %xmm0, %xmm3 +; NOOPT-NEXT: andnpd %xmm2, %xmm1 +; NOOPT-NEXT: orpd %xmm3, %xmm1 +; NOOPT-NEXT: movaps %xmm1, %xmm0 +; NOOPT-NEXT: retq +; X87-LABEL: test5: +; X87: # %bb.0: +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldz +; X87-NEXT: fchs +; X87-NEXT: fxch %st(1) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fstp %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: fld %st(0) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB4_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: fstp %st(1) +; X87-NEXT: fldz +; X87-NEXT: .LBB4_2: +; X87-NEXT: fstp %st(0) +; X87-NEXT: retl +; +; X87UNSAFE-LABEL: test5: +; X87UNSAFE: # %bb.0: +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: retl +; +; X64-LABEL: test5: +; X64: # %bb.0: +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: movapd %xmm1, %xmm2 +; X64-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; X64-NEXT: xorpd %xmm0, %xmm2 +; X64-NEXT: cmplesd %xmm0, %xmm1 +; X64-NEXT: movapd %xmm1, %xmm3 +; X64-NEXT: andnpd %xmm2, %xmm3 +; X64-NEXT: andpd %xmm0, %xmm1 +; X64-NEXT: orpd %xmm3, %xmm1 +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: retq + %Y = fcmp oge double %X, -0.0 + %Z = fsub nsz double -0.0, %X + %Q = select i1 %Y, double %X, double %Z + ret double %Q +} + +define <4 x double> @test6(<4 x double> %X) { +; X87-LABEL: test6: +; X87: # %bb.0: +; X87-NEXT: pushl %ebx +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: pushl %esi +; X87-NEXT: .cfi_def_cfa_offset 12 +; X87-NEXT: .cfi_offset %esi, -12 +; X87-NEXT: .cfi_offset %ebx, -8 +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldz +; X87-NEXT: fchs +; X87-NEXT: fxch %st(1) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: movl %eax, %ecx +; X87-NEXT: fxch %st(2) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: movl %eax, %edx +; X87-NEXT: fxch %st(3) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: movl %eax, %ebx +; X87-NEXT: fxch %st(4) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fstp %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: fld %st(0) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB5_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: fstp %st(1) +; X87-NEXT: fldz +; X87-NEXT: .LBB5_2: +; X87-NEXT: fstp %st(0) +; X87-NEXT: movb %bh, %ah +; X87-NEXT: sahf +; X87-NEXT: fld %st(3) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB5_4 +; X87-NEXT: # %bb.3: +; X87-NEXT: fstp %st(4) +; X87-NEXT: fldz +; X87-NEXT: .LBB5_4: +; X87-NEXT: fstp %st(0) +; X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X87-NEXT: movb %dh, %ah +; X87-NEXT: sahf +; X87-NEXT: fld %st(2) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB5_6 +; X87-NEXT: # %bb.5: +; X87-NEXT: fstp %st(3) +; X87-NEXT: fldz +; X87-NEXT: .LBB5_6: +; X87-NEXT: fstp %st(0) +; X87-NEXT: movb %ch, %ah +; X87-NEXT: sahf +; X87-NEXT: fld %st(1) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB5_8 +; X87-NEXT: # %bb.7: +; X87-NEXT: fstp %st(2) +; X87-NEXT: fldz +; X87-NEXT: .LBB5_8: +; X87-NEXT: fstp %st(0) +; X87-NEXT: fxch %st(1) +; X87-NEXT: fstpl 24(%esi) +; X87-NEXT: fxch %st(1) +; X87-NEXT: fstpl 16(%esi) +; X87-NEXT: fxch %st(1) +; X87-NEXT: fstpl 8(%esi) +; X87-NEXT: fstpl (%esi) +; X87-NEXT: movl %esi, %eax +; X87-NEXT: popl %esi +; X87-NEXT: popl %ebx +; X87-NEXT: retl $4 +; +; X87UNSAFE-LABEL: test6: +; X87UNSAFE: # %bb.0: +; X87UNSAFE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fxch %st(1) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fxch %st(2) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fxch %st(3) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fstpl 24(%eax) +; X87UNSAFE-NEXT: fxch %st(2) +; X87UNSAFE-NEXT: fstpl 16(%eax) +; X87UNSAFE-NEXT: fstpl 8(%eax) +; X87UNSAFE-NEXT: fstpl (%eax) +; X87UNSAFE-NEXT: retl $4 +; +; X64-LABEL: test6: +; X64: # %bb.0: +; X64-NEXT: movapd {{.*#+}} xmm2 = [-0.000000e+00,-0.000000e+00] +; X64-NEXT: movapd %xmm2, %xmm3 +; X64-NEXT: cmplepd %xmm1, %xmm3 +; X64-NEXT: movapd %xmm2, %xmm4 +; X64-NEXT: cmplepd %xmm0, %xmm4 +; X64-NEXT: movapd %xmm1, %xmm5 +; X64-NEXT: xorpd %xmm2, %xmm5 +; X64-NEXT: xorpd %xmm0, %xmm2 +; X64-NEXT: andpd %xmm4, %xmm0 +; X64-NEXT: andnpd %xmm2, %xmm4 +; X64-NEXT: orpd %xmm4, %xmm0 +; X64-NEXT: andpd %xmm3, %xmm1 +; X64-NEXT: andnpd %xmm5, %xmm3 +; X64-NEXT: orpd %xmm3, %xmm1 +; X64-NEXT: retq + %Y = fcmp oge <4 x double> %X, + %Z = fsub nsz <4 x double> , %X + %Q = select <4 x i1> %Y, <4 x double> %X, <4 x double> %Z + ret <4 x double> %Q +} + +define <4 x double> @test7(<4 x double> %X) { +; X87-LABEL: test7: +; X87: # %bb.0: +; X87-NEXT: pushl %ebx +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: pushl %esi +; X87-NEXT: .cfi_def_cfa_offset 12 +; X87-NEXT: .cfi_offset %esi, -12 +; X87-NEXT: .cfi_offset %ebx, -8 +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fldz +; X87-NEXT: fxch %st(1) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: movl %eax, %ecx +; X87-NEXT: fxch %st(2) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: movl %eax, %edx +; X87-NEXT: fxch %st(3) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: movl %eax, %ebx +; X87-NEXT: fxch %st(4) +; X87-NEXT: fucom %st(1) +; X87-NEXT: fstp %st(1) +; X87-NEXT: fnstsw %ax +; X87-NEXT: # kill: def $ah killed $ah killed $ax +; X87-NEXT: sahf +; X87-NEXT: fld %st(0) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB6_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: fstp %st(1) +; X87-NEXT: fldz +; X87-NEXT: .LBB6_2: +; X87-NEXT: fstp %st(0) +; X87-NEXT: movb %bh, %ah +; X87-NEXT: sahf +; X87-NEXT: fld %st(3) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB6_4 +; X87-NEXT: # %bb.3: +; X87-NEXT: fstp %st(4) +; X87-NEXT: fldz +; X87-NEXT: .LBB6_4: +; X87-NEXT: fstp %st(0) +; X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X87-NEXT: movb %dh, %ah +; X87-NEXT: sahf +; X87-NEXT: fld %st(2) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB6_6 +; X87-NEXT: # %bb.5: +; X87-NEXT: fstp %st(3) +; X87-NEXT: fldz +; X87-NEXT: .LBB6_6: +; X87-NEXT: fstp %st(0) +; X87-NEXT: movb %ch, %ah +; X87-NEXT: sahf +; X87-NEXT: fld %st(1) +; X87-NEXT: fchs +; X87-NEXT: jae .LBB6_8 +; X87-NEXT: # %bb.7: +; X87-NEXT: fstp %st(2) +; X87-NEXT: fldz +; X87-NEXT: .LBB6_8: +; X87-NEXT: fstp %st(0) +; X87-NEXT: fxch %st(1) +; X87-NEXT: fstpl 24(%esi) +; X87-NEXT: fxch %st(1) +; X87-NEXT: fstpl 16(%esi) +; X87-NEXT: fxch %st(1) +; X87-NEXT: fstpl 8(%esi) +; X87-NEXT: fstpl (%esi) +; X87-NEXT: movl %esi, %eax +; X87-NEXT: popl %esi +; X87-NEXT: popl %ebx +; X87-NEXT: retl $4 +; +; X87UNSAFE-LABEL: test7: +; X87UNSAFE: # %bb.0: +; X87UNSAFE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fldl {{[0-9]+}}(%esp) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fxch %st(1) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fxch %st(2) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fxch %st(3) +; X87UNSAFE-NEXT: fabs +; X87UNSAFE-NEXT: fstpl 24(%eax) +; X87UNSAFE-NEXT: fxch %st(2) +; X87UNSAFE-NEXT: fstpl 16(%eax) +; X87UNSAFE-NEXT: fstpl 8(%eax) +; X87UNSAFE-NEXT: fstpl (%eax) +; X87UNSAFE-NEXT: retl $4 +; +; X64-LABEL: test7: +; X64: # %bb.0: +; X64-NEXT: xorpd %xmm2, %xmm2 +; X64-NEXT: xorpd %xmm3, %xmm3 +; X64-NEXT: cmplepd %xmm1, %xmm3 +; X64-NEXT: cmplepd %xmm0, %xmm2 +; X64-NEXT: movapd {{.*#+}} xmm4 = [-0.000000e+00,-0.000000e+00] +; X64-NEXT: movapd %xmm1, %xmm5 +; X64-NEXT: xorpd %xmm4, %xmm5 +; X64-NEXT: xorpd %xmm0, %xmm4 +; X64-NEXT: andpd %xmm2, %xmm0 +; X64-NEXT: andnpd %xmm4, %xmm2 +; X64-NEXT: orpd %xmm2, %xmm0 +; X64-NEXT: andpd %xmm3, %xmm1 +; X64-NEXT: andnpd %xmm5, %xmm3 +; X64-NEXT: orpd %xmm3, %xmm1 +; X64-NEXT: retq + %Y = fcmp oge <4 x double> %X, zeroinitializer + %Z = fsub nsz <4 x double> zeroinitializer, %X + %Q = select <4 x i1> %Y, <4 x double> %X, <4 x double> %Z + ret <4 x double> %Q +}