diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6064,28 +6064,28 @@ // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). // Sel = Src < 0x8000000000000000 - // Val = select Sel, Src, Src - 0x8000000000000000 - // Ofs = select Sel, 0, 0x8000000000000000 - // Result = fp_to_sint(Val) ^ Ofs + // FltOfs = select Sel, 0, 0x8000000000000000 + // IntOfs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue SrcBiased; - if (Node->isStrictFPOpcode()) - SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, - { Node->getOperand(0), Src, Cst }); - else - SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); - SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), - DAG.getConstant(SignMask, dl, DstVT)); + SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel, + DAG.getConstantFP(0.0, dl, SrcVT), Cst); + SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel, + DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); SDValue SInt; if (Node->isStrictFPOpcode()) { + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, FltOfs }); SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, - { SrcBiased.getValue(1), Val }); + { Val.getValue(1), Val }); Chain = SInt.getValue(1); - } else + } else { + SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs); SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); - Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); + } + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19047,8 +19047,9 @@ // of a signed i64. Let Thresh be the FP equivalent of // 0x8000000000000000ULL. // - // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000; - // FistSrc = (Value < Thresh) ? Value : (Value - Thresh); + // Adjust = (Value < Thresh) ? 0 : 0x80000000; + // FltOfs = (Value < Thresh) ? 0 : 0x80000000; + // FistSrc = (Value - FltOfs); // Fist-to-mem64 FistSrc // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent // to XOR'ing the high 32 bits with Adjust. @@ -19056,7 +19057,6 @@ // Being a power of 2, Thresh is exactly representable in all FP formats. // For X87 we'd like to use the smallest FP type for this constant, but // for DAG type consistency we have to match the FP operand type. - // FIXME: This code generates a spurious inexact exception for 1.0. APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; @@ -19082,18 +19082,16 @@ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(APInt::getSignMask(64), DL, MVT::i64)); - SDValue Sub; + SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp, + DAG.getConstantFP(0.0, DL, TheVT), + ThreshVal); + if (IsStrict) { - Sub = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other}, - { Chain, Value, ThreshVal }); - Chain = Sub.getValue(1); + Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other}, + { Chain, Value, FltOfs }); + Chain = Value.getValue(1); } else - Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal); - - Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(), - *DAG.getContext(), TheVT), - Value, ThreshVal, ISD::SETLT); - Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub); + Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs); } MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll @@ -20,12 +20,15 @@ ; CHECK-NEXT: larl %r1, .LCPI0_0 ; CHECK-NEXT: le %f1, 0(%r1) ; CHECK-NEXT: cebr %f0, %f1 -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: jnl .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sebr %f0, %f1 -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: lzer %f1 +; CHECK-NEXT: j .LBB0_3 ; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cfebr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -41,12 +44,15 @@ ; CHECK-NEXT: larl %r1, .LCPI1_0 ; CHECK-NEXT: ldeb %f1, 0(%r1) ; CHECK-NEXT: cdbr %f0, %f1 -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB1_2 +; CHECK-NEXT: jnl .LBB1_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sdbr %f0, %f1 -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: lzdr %f1 +; CHECK-NEXT: j .LBB1_3 ; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cfdbr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -64,12 +70,15 @@ ; CHECK-NEXT: larl %r1, .LCPI2_0 ; CHECK-NEXT: lxeb %f1, 0(%r1) ; CHECK-NEXT: cxbr %f0, %f1 -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB2_2 +; CHECK-NEXT: jnl .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sxbr %f0, %f1 -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: lzxr %f1 +; CHECK-NEXT: j .LBB2_3 ; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cfxbr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll --- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll @@ -19,12 +19,15 @@ ; CHECK-NEXT: larl %r1, .LCPI0_0 ; CHECK-NEXT: le %f1, 0(%r1) ; CHECK-NEXT: cebr %f0, %f1 -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: jnl .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sebr %f0, %f1 -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: lzer %f1 +; CHECK-NEXT: j .LBB0_3 ; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cgebr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -40,12 +43,15 @@ ; CHECK-NEXT: larl %r1, .LCPI1_0 ; CHECK-NEXT: ldeb %f1, 0(%r1) ; CHECK-NEXT: cdbr %f0, %f1 -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB1_2 +; CHECK-NEXT: jnl .LBB1_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sdbr %f0, %f1 -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: lzdr %f1 +; CHECK-NEXT: j .LBB1_3 ; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cgdbr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -63,12 +69,15 @@ ; CHECK-NEXT: larl %r1, .LCPI2_0 ; CHECK-NEXT: lxeb %f1, 0(%r1) ; CHECK-NEXT: cxbr %f0, %f1 -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB2_2 +; CHECK-NEXT: jnl .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sxbr %f0, %f1 -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: lzxr %f1 +; CHECK-NEXT: j .LBB2_3 ; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cgxbr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/X86/fp-cvt.ll b/llvm/test/CodeGen/X86/fp-cvt.ll --- a/llvm/test/CodeGen/X86/fp-cvt.ll +++ b/llvm/test/CodeGen/X86/fp-cvt.ll @@ -444,20 +444,21 @@ ; X86-NEXT: subl $16, %esp ; X86-NEXT: fldt 8(%ebp) ; X86-NEXT: flds {{\.LCPI.*}} -; X86-NEXT: fld %st(1) -; X86-NEXT: fsub %st(1), %st -; X86-NEXT: fxch %st(1) -; X86-NEXT: fucomp %st(2) +; X86-NEXT: fucom %st(1) ; X86-NEXT: fnstsw %ax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: # kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf +; X86-NEXT: setbe %al +; X86-NEXT: fldz ; X86-NEXT: ja .LBB10_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: fstp %st(1) +; X86-NEXT: fstp %st(0) ; X86-NEXT: fldz +; X86-NEXT: fxch %st(1) ; X86-NEXT: .LBB10_2: -; X86-NEXT: fstp %st(0) -; X86-NEXT: setbe %al +; X86-NEXT: fstp %st(1) +; X86-NEXT: fsubrp %st, %st(1) ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -465,7 +466,7 @@ ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: movzbl %al, %edx +; X86-NEXT: movb %al, %dl ; X86-NEXT: shll $31, %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -477,14 +478,14 @@ ; X64-X87: # %bb.0: ; X64-X87-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-X87-NEXT: flds {{.*}}(%rip) -; X64-X87-NEXT: fld %st(1) -; X64-X87-NEXT: fsub %st(1), %st ; X64-X87-NEXT: xorl %eax, %eax +; X64-X87-NEXT: fucomi %st(1), %st +; X64-X87-NEXT: setbe %al +; X64-X87-NEXT: fldz ; X64-X87-NEXT: fxch %st(1) -; X64-X87-NEXT: fucompi %st(2), %st ; X64-X87-NEXT: fcmovnbe %st(1), %st ; X64-X87-NEXT: fstp %st(1) -; X64-X87-NEXT: setbe %al +; X64-X87-NEXT: fsubrp %st, %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; X64-X87-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -500,13 +501,13 @@ ; X64-SSSE3: # %bb.0: ; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-SSSE3-NEXT: flds {{.*}}(%rip) -; X64-SSSE3-NEXT: fld %st(1) -; X64-SSSE3-NEXT: fsub %st(1), %st ; X64-SSSE3-NEXT: xorl %eax, %eax +; X64-SSSE3-NEXT: fucomi %st(1), %st +; X64-SSSE3-NEXT: fldz ; X64-SSSE3-NEXT: fxch %st(1) -; X64-SSSE3-NEXT: fucompi %st(2), %st ; X64-SSSE3-NEXT: fcmovnbe %st(1), %st ; X64-SSSE3-NEXT: fstp %st(1) +; X64-SSSE3-NEXT: fsubrp %st, %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; X64-SSSE3-NEXT: setbe %al ; X64-SSSE3-NEXT: shlq $63, %rax @@ -526,20 +527,21 @@ ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: fldt (%eax) ; X86-NEXT: flds {{\.LCPI.*}} -; X86-NEXT: fld %st(1) -; X86-NEXT: fsub %st(1), %st -; X86-NEXT: fxch %st(1) -; X86-NEXT: fucomp %st(2) +; X86-NEXT: fucom %st(1) ; X86-NEXT: fnstsw %ax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: # kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf +; X86-NEXT: setbe %al +; X86-NEXT: fldz ; X86-NEXT: ja .LBB11_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: fstp %st(1) +; X86-NEXT: fstp %st(0) ; X86-NEXT: fldz +; X86-NEXT: fxch %st(1) ; X86-NEXT: .LBB11_2: -; X86-NEXT: fstp %st(0) -; X86-NEXT: setbe %al +; X86-NEXT: fstp %st(1) +; X86-NEXT: fsubrp %st, %st(1) ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -547,7 +549,7 @@ ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: movzbl %al, %edx +; X86-NEXT: movb %al, %dl ; X86-NEXT: shll $31, %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -559,14 +561,14 @@ ; X64-X87: # %bb.0: ; X64-X87-NEXT: fldt (%rdi) ; X64-X87-NEXT: flds {{.*}}(%rip) -; X64-X87-NEXT: fld %st(1) -; X64-X87-NEXT: fsub %st(1), %st ; X64-X87-NEXT: xorl %eax, %eax +; X64-X87-NEXT: fucomi %st(1), %st +; X64-X87-NEXT: setbe %al +; X64-X87-NEXT: fldz ; X64-X87-NEXT: fxch %st(1) -; X64-X87-NEXT: fucompi %st(2), %st ; X64-X87-NEXT: fcmovnbe %st(1), %st ; X64-X87-NEXT: fstp %st(1) -; X64-X87-NEXT: setbe %al +; X64-X87-NEXT: fsubrp %st, %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; X64-X87-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -582,13 +584,13 @@ ; X64-SSSE3: # %bb.0: ; X64-SSSE3-NEXT: fldt (%rdi) ; X64-SSSE3-NEXT: flds {{.*}}(%rip) -; X64-SSSE3-NEXT: fld %st(1) -; X64-SSSE3-NEXT: fsub %st(1), %st ; X64-SSSE3-NEXT: xorl %eax, %eax +; X64-SSSE3-NEXT: fucomi %st(1), %st +; X64-SSSE3-NEXT: fldz ; X64-SSSE3-NEXT: fxch %st(1) -; X64-SSSE3-NEXT: fucompi %st(2), %st ; X64-SSSE3-NEXT: fcmovnbe %st(1), %st ; X64-SSSE3-NEXT: fstp %st(1) +; X64-SSSE3-NEXT: fsubrp %st, %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; X64-SSSE3-NEXT: setbe %al ; X64-SSSE3-NEXT: shlq $63, %rax diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -1278,18 +1278,15 @@ ; X86-SSE: # %bb.0: # %entry ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE-NEXT: xorl %ecx, %ecx +; X86-SSE-NEXT: ucomisd %xmm0, %xmm1 +; X86-SSE-NEXT: setbe %cl +; X86-SSE-NEXT: shll $31, %ecx ; X86-SSE-NEXT: movapd %xmm0, %xmm2 ; X86-SSE-NEXT: cmpltsd %xmm1, %xmm2 -; X86-SSE-NEXT: movapd %xmm2, %xmm3 -; X86-SSE-NEXT: andpd %xmm0, %xmm2 -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE-NEXT: subsd %xmm1, %xmm0 -; X86-SSE-NEXT: andnpd %xmm0, %xmm3 -; X86-SSE-NEXT: orpd %xmm3, %xmm2 -; X86-SSE-NEXT: cvttsd2si %xmm2, %ecx -; X86-SSE-NEXT: setbe %al -; X86-SSE-NEXT: shll $31, %eax +; X86-SSE-NEXT: andnpd %xmm1, %xmm2 +; X86-SSE-NEXT: subsd %xmm2, %xmm0 +; X86-SSE-NEXT: cvttsd2si %xmm0, %eax ; X86-SSE-NEXT: xorl %ecx, %eax ; X86-SSE-NEXT: retl ; @@ -1326,14 +1323,14 @@ ; X87-NEXT: .cfi_def_cfa_offset 24 ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: flds {{\.LCPI.*}} -; X87-NEXT: fld %st(1) -; X87-NEXT: fsub %st(1), %st ; X87-NEXT: xorl %edx, %edx +; X87-NEXT: fucomi %st(1), %st +; X87-NEXT: setbe %dl +; X87-NEXT: fldz ; X87-NEXT: fxch %st(1) -; X87-NEXT: fucompi %st(2), %st ; X87-NEXT: fcmovnbe %st(1), %st ; X87-NEXT: fstp %st(1) -; X87-NEXT: setbe %dl +; X87-NEXT: fsubrp %st, %st(1) ; X87-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1355,14 +1352,11 @@ ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X86-SSE-NEXT: movapd %xmm0, %xmm2 -; X86-SSE-NEXT: subsd %xmm1, %xmm2 +; X86-SSE-NEXT: cmpltsd %xmm1, %xmm2 +; X86-SSE-NEXT: andnpd %xmm1, %xmm2 ; X86-SSE-NEXT: movapd %xmm0, %xmm3 -; X86-SSE-NEXT: cmpltsd %xmm1, %xmm3 -; X86-SSE-NEXT: movapd %xmm3, %xmm4 -; X86-SSE-NEXT: andnpd %xmm2, %xmm4 -; X86-SSE-NEXT: andpd %xmm0, %xmm3 -; X86-SSE-NEXT: orpd %xmm4, %xmm3 -; X86-SSE-NEXT: movlpd %xmm3, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: subsd %xmm2, %xmm3 +; X86-SSE-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -1384,32 +1378,29 @@ ; SSE-LABEL: f20u64: ; SSE: # %bb.0: # %entry ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: ucomisd %xmm1, %xmm0 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx ; SSE-NEXT: movapd %xmm0, %xmm2 ; SSE-NEXT: cmpltsd %xmm1, %xmm2 -; SSE-NEXT: movapd %xmm2, %xmm3 -; SSE-NEXT: andpd %xmm0, %xmm2 -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: subsd %xmm1, %xmm0 -; SSE-NEXT: andnpd %xmm0, %xmm3 -; SSE-NEXT: orpd %xmm3, %xmm2 -; SSE-NEXT: cvttsd2si %xmm2, %rcx -; SSE-NEXT: setae %al -; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: subsd %xmm2, %xmm0 +; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: xorq %rcx, %rax ; SSE-NEXT: retq ; ; AVX1-LABEL: f20u64: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3 -; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm2 -; AVX1-NEXT: vcvttsd2si %xmm2, %rcx -; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomisd %xmm1, %xmm0 -; AVX1-NEXT: setae %al -; AVX1-NEXT: shlq $63, %rax +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vandnpd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcvttsd2si %xmm0, %rax ; AVX1-NEXT: xorq %rcx, %rax ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll --- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -89,13 +89,14 @@ ; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX512F_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 -; AVX512F_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX512F_32_WIN-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} -; AVX512F_32_WIN-NEXT: vmovss %xmm2, (%esp) -; AVX512F_32_WIN-NEXT: flds (%esp) -; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512F_32_WIN-NEXT: xorl %edx, %edx ; AVX512F_32_WIN-NEXT: vucomiss %xmm0, %xmm1 +; AVX512F_32_WIN-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} +; AVX512F_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512F_32_WIN-NEXT: vmovss %xmm0, (%esp) +; AVX512F_32_WIN-NEXT: flds (%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) ; AVX512F_32_WIN-NEXT: setbe %dl ; AVX512F_32_WIN-NEXT: shll $31, %edx ; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -110,13 +111,14 @@ ; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX512F_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 -; AVX512F_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX512F_32_LIN-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} -; AVX512F_32_LIN-NEXT: vmovss %xmm2, (%esp) -; AVX512F_32_LIN-NEXT: flds (%esp) -; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512F_32_LIN-NEXT: xorl %edx, %edx ; AVX512F_32_LIN-NEXT: vucomiss %xmm0, %xmm1 +; AVX512F_32_LIN-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} +; AVX512F_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512F_32_LIN-NEXT: vmovss %xmm0, (%esp) +; AVX512F_32_LIN-NEXT: flds (%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) ; AVX512F_32_LIN-NEXT: setbe %dl ; AVX512F_32_LIN-NEXT: shll $31, %edx ; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -133,14 +135,11 @@ ; SSE3_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE3_32_WIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE3_32_WIN-NEXT: movaps %xmm0, %xmm2 -; SSE3_32_WIN-NEXT: cmpltss %xmm1, %xmm2 -; SSE3_32_WIN-NEXT: movaps %xmm2, %xmm3 -; SSE3_32_WIN-NEXT: andps %xmm0, %xmm2 ; SSE3_32_WIN-NEXT: xorl %edx, %edx ; SSE3_32_WIN-NEXT: ucomiss %xmm0, %xmm1 -; SSE3_32_WIN-NEXT: subss %xmm1, %xmm0 -; SSE3_32_WIN-NEXT: andnps %xmm0, %xmm3 -; SSE3_32_WIN-NEXT: orps %xmm3, %xmm2 +; SSE3_32_WIN-NEXT: cmpltss %xmm1, %xmm0 +; SSE3_32_WIN-NEXT: andnps %xmm1, %xmm0 +; SSE3_32_WIN-NEXT: subss %xmm0, %xmm2 ; SSE3_32_WIN-NEXT: movss %xmm2, (%esp) ; SSE3_32_WIN-NEXT: flds (%esp) ; SSE3_32_WIN-NEXT: fisttpll (%esp) @@ -158,14 +157,11 @@ ; SSE3_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE3_32_LIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE3_32_LIN-NEXT: movaps %xmm0, %xmm2 -; SSE3_32_LIN-NEXT: cmpltss %xmm1, %xmm2 -; SSE3_32_LIN-NEXT: movaps %xmm2, %xmm3 -; SSE3_32_LIN-NEXT: andps %xmm0, %xmm2 ; SSE3_32_LIN-NEXT: xorl %edx, %edx ; SSE3_32_LIN-NEXT: ucomiss %xmm0, %xmm1 -; SSE3_32_LIN-NEXT: subss %xmm1, %xmm0 -; SSE3_32_LIN-NEXT: andnps %xmm0, %xmm3 -; SSE3_32_LIN-NEXT: orps %xmm3, %xmm2 +; SSE3_32_LIN-NEXT: cmpltss %xmm1, %xmm0 +; SSE3_32_LIN-NEXT: andnps %xmm1, %xmm0 +; SSE3_32_LIN-NEXT: subss %xmm0, %xmm2 ; SSE3_32_LIN-NEXT: movss %xmm2, (%esp) ; SSE3_32_LIN-NEXT: flds (%esp) ; SSE3_32_LIN-NEXT: fisttpll (%esp) @@ -198,13 +194,10 @@ ; SSE2_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2_32_WIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2_32_WIN-NEXT: movaps %xmm0, %xmm2 -; SSE2_32_WIN-NEXT: subss %xmm1, %xmm2 +; SSE2_32_WIN-NEXT: cmpltss %xmm1, %xmm2 +; SSE2_32_WIN-NEXT: andnps %xmm1, %xmm2 ; SSE2_32_WIN-NEXT: movaps %xmm0, %xmm3 -; SSE2_32_WIN-NEXT: cmpltss %xmm1, %xmm3 -; SSE2_32_WIN-NEXT: movaps %xmm3, %xmm4 -; SSE2_32_WIN-NEXT: andnps %xmm2, %xmm4 -; SSE2_32_WIN-NEXT: andps %xmm0, %xmm3 -; SSE2_32_WIN-NEXT: orps %xmm4, %xmm3 +; SSE2_32_WIN-NEXT: subss %xmm2, %xmm3 ; SSE2_32_WIN-NEXT: movss %xmm3, {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: flds {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -230,13 +223,10 @@ ; SSE2_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2_32_LIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2_32_LIN-NEXT: movaps %xmm0, %xmm2 -; SSE2_32_LIN-NEXT: subss %xmm1, %xmm2 +; SSE2_32_LIN-NEXT: cmpltss %xmm1, %xmm2 +; SSE2_32_LIN-NEXT: andnps %xmm1, %xmm2 ; SSE2_32_LIN-NEXT: movaps %xmm0, %xmm3 -; SSE2_32_LIN-NEXT: cmpltss %xmm1, %xmm3 -; SSE2_32_LIN-NEXT: movaps %xmm3, %xmm4 -; SSE2_32_LIN-NEXT: andnps %xmm2, %xmm4 -; SSE2_32_LIN-NEXT: andps %xmm0, %xmm3 -; SSE2_32_LIN-NEXT: orps %xmm4, %xmm3 +; SSE2_32_LIN-NEXT: subss %xmm2, %xmm3 ; SSE2_32_LIN-NEXT: movss %xmm3, {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: flds {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -276,20 +266,21 @@ ; X87_WIN-NEXT: subl $16, %esp ; X87_WIN-NEXT: flds 8(%ebp) ; X87_WIN-NEXT: flds __real@5f000000 -; X87_WIN-NEXT: fld %st(1) -; X87_WIN-NEXT: fsub %st(1), %st -; X87_WIN-NEXT: fxch %st(1) -; X87_WIN-NEXT: fucomp %st(2) +; X87_WIN-NEXT: fucom %st(1) ; X87_WIN-NEXT: fnstsw %ax +; X87_WIN-NEXT: xorl %edx, %edx ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_WIN-NEXT: sahf +; X87_WIN-NEXT: setbe %al +; X87_WIN-NEXT: fldz ; X87_WIN-NEXT: ja LBB0_2 ; X87_WIN-NEXT: # %bb.1: -; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fstp %st(0) ; X87_WIN-NEXT: fldz +; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: LBB0_2: -; X87_WIN-NEXT: fstp %st(0) -; X87_WIN-NEXT: setbe %al +; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fsubrp %st, %st(1) ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -297,7 +288,7 @@ ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: movzbl %al, %edx +; X87_WIN-NEXT: movb %al, %dl ; X87_WIN-NEXT: shll $31, %edx ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -310,20 +301,21 @@ ; X87_LIN-NEXT: subl $20, %esp ; X87_LIN-NEXT: flds {{[0-9]+}}(%esp) ; X87_LIN-NEXT: flds {{\.LCPI.*}} -; X87_LIN-NEXT: fld %st(1) -; X87_LIN-NEXT: fsub %st(1), %st -; X87_LIN-NEXT: fxch %st(1) -; X87_LIN-NEXT: fucomp %st(2) +; X87_LIN-NEXT: fucom %st(1) ; X87_LIN-NEXT: fnstsw %ax +; X87_LIN-NEXT: xorl %edx, %edx ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_LIN-NEXT: sahf +; X87_LIN-NEXT: setbe %al +; X87_LIN-NEXT: fldz ; X87_LIN-NEXT: ja .LBB0_2 ; X87_LIN-NEXT: # %bb.1: -; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fstp %st(0) ; X87_LIN-NEXT: fldz +; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: .LBB0_2: -; X87_LIN-NEXT: fstp %st(0) -; X87_LIN-NEXT: setbe %al +; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fsubrp %st, %st(1) ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -331,7 +323,7 @@ ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: movzbl %al, %edx +; X87_LIN-NEXT: movb %al, %dl ; X87_LIN-NEXT: shll $31, %edx ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -579,13 +571,14 @@ ; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX512F_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 -; AVX512F_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX512F_32_WIN-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1} -; AVX512F_32_WIN-NEXT: vmovsd %xmm2, (%esp) -; AVX512F_32_WIN-NEXT: fldl (%esp) -; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; AVX512F_32_WIN-NEXT: xorl %edx, %edx ; AVX512F_32_WIN-NEXT: vucomisd %xmm0, %xmm1 +; AVX512F_32_WIN-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1} +; AVX512F_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX512F_32_WIN-NEXT: vmovsd %xmm0, (%esp) +; AVX512F_32_WIN-NEXT: fldl (%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) ; AVX512F_32_WIN-NEXT: setbe %dl ; AVX512F_32_WIN-NEXT: shll $31, %edx ; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -600,13 +593,14 @@ ; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX512F_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 -; AVX512F_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX512F_32_LIN-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1} -; AVX512F_32_LIN-NEXT: vmovsd %xmm2, (%esp) -; AVX512F_32_LIN-NEXT: fldl (%esp) -; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; AVX512F_32_LIN-NEXT: xorl %edx, %edx ; AVX512F_32_LIN-NEXT: vucomisd %xmm0, %xmm1 +; AVX512F_32_LIN-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1} +; AVX512F_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX512F_32_LIN-NEXT: vmovsd %xmm0, (%esp) +; AVX512F_32_LIN-NEXT: fldl (%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) ; AVX512F_32_LIN-NEXT: setbe %dl ; AVX512F_32_LIN-NEXT: shll $31, %edx ; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -623,15 +617,12 @@ ; SSE3_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE3_32_WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE3_32_WIN-NEXT: movapd %xmm0, %xmm2 -; SSE3_32_WIN-NEXT: cmpltsd %xmm1, %xmm2 -; SSE3_32_WIN-NEXT: movapd %xmm2, %xmm3 -; SSE3_32_WIN-NEXT: andpd %xmm0, %xmm2 ; SSE3_32_WIN-NEXT: xorl %edx, %edx ; SSE3_32_WIN-NEXT: ucomisd %xmm0, %xmm1 -; SSE3_32_WIN-NEXT: subsd %xmm1, %xmm0 -; SSE3_32_WIN-NEXT: andnpd %xmm0, %xmm3 -; SSE3_32_WIN-NEXT: orpd %xmm3, %xmm2 -; SSE3_32_WIN-NEXT: movlpd %xmm2, (%esp) +; SSE3_32_WIN-NEXT: cmpltsd %xmm1, %xmm0 +; SSE3_32_WIN-NEXT: andnpd %xmm1, %xmm0 +; SSE3_32_WIN-NEXT: subsd %xmm0, %xmm2 +; SSE3_32_WIN-NEXT: movsd %xmm2, (%esp) ; SSE3_32_WIN-NEXT: fldl (%esp) ; SSE3_32_WIN-NEXT: fisttpll (%esp) ; SSE3_32_WIN-NEXT: setbe %dl @@ -648,15 +639,12 @@ ; SSE3_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE3_32_LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE3_32_LIN-NEXT: movapd %xmm0, %xmm2 -; SSE3_32_LIN-NEXT: cmpltsd %xmm1, %xmm2 -; SSE3_32_LIN-NEXT: movapd %xmm2, %xmm3 -; SSE3_32_LIN-NEXT: andpd %xmm0, %xmm2 ; SSE3_32_LIN-NEXT: xorl %edx, %edx ; SSE3_32_LIN-NEXT: ucomisd %xmm0, %xmm1 -; SSE3_32_LIN-NEXT: subsd %xmm1, %xmm0 -; SSE3_32_LIN-NEXT: andnpd %xmm0, %xmm3 -; SSE3_32_LIN-NEXT: orpd %xmm3, %xmm2 -; SSE3_32_LIN-NEXT: movlpd %xmm2, (%esp) +; SSE3_32_LIN-NEXT: cmpltsd %xmm1, %xmm0 +; SSE3_32_LIN-NEXT: andnpd %xmm1, %xmm0 +; SSE3_32_LIN-NEXT: subsd %xmm0, %xmm2 +; SSE3_32_LIN-NEXT: movsd %xmm2, (%esp) ; SSE3_32_LIN-NEXT: fldl (%esp) ; SSE3_32_LIN-NEXT: fisttpll (%esp) ; SSE3_32_LIN-NEXT: setbe %dl @@ -688,14 +676,11 @@ ; SSE2_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32_WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE2_32_WIN-NEXT: movapd %xmm0, %xmm2 -; SSE2_32_WIN-NEXT: subsd %xmm1, %xmm2 +; SSE2_32_WIN-NEXT: cmpltsd %xmm1, %xmm2 +; SSE2_32_WIN-NEXT: andnpd %xmm1, %xmm2 ; SSE2_32_WIN-NEXT: movapd %xmm0, %xmm3 -; SSE2_32_WIN-NEXT: cmpltsd %xmm1, %xmm3 -; SSE2_32_WIN-NEXT: movapd %xmm3, %xmm4 -; SSE2_32_WIN-NEXT: andnpd %xmm2, %xmm4 -; SSE2_32_WIN-NEXT: andpd %xmm0, %xmm3 -; SSE2_32_WIN-NEXT: orpd %xmm4, %xmm3 -; SSE2_32_WIN-NEXT: movlpd %xmm3, {{[0-9]+}}(%esp) +; SSE2_32_WIN-NEXT: subsd %xmm2, %xmm3 +; SSE2_32_WIN-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -720,14 +705,11 @@ ; SSE2_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32_LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE2_32_LIN-NEXT: movapd %xmm0, %xmm2 -; SSE2_32_LIN-NEXT: subsd %xmm1, %xmm2 +; SSE2_32_LIN-NEXT: cmpltsd %xmm1, %xmm2 +; SSE2_32_LIN-NEXT: andnpd %xmm1, %xmm2 ; SSE2_32_LIN-NEXT: movapd %xmm0, %xmm3 -; SSE2_32_LIN-NEXT: cmpltsd %xmm1, %xmm3 -; SSE2_32_LIN-NEXT: movapd %xmm3, %xmm4 -; SSE2_32_LIN-NEXT: andnpd %xmm2, %xmm4 -; SSE2_32_LIN-NEXT: andpd %xmm0, %xmm3 -; SSE2_32_LIN-NEXT: orpd %xmm4, %xmm3 -; SSE2_32_LIN-NEXT: movlpd %xmm3, {{[0-9]+}}(%esp) +; SSE2_32_LIN-NEXT: subsd %xmm2, %xmm3 +; SSE2_32_LIN-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -766,20 +748,21 @@ ; X87_WIN-NEXT: subl $16, %esp ; X87_WIN-NEXT: fldl 8(%ebp) ; X87_WIN-NEXT: flds __real@5f000000 -; X87_WIN-NEXT: fld %st(1) -; X87_WIN-NEXT: fsub %st(1), %st -; X87_WIN-NEXT: fxch %st(1) -; X87_WIN-NEXT: fucomp %st(2) +; X87_WIN-NEXT: fucom %st(1) ; X87_WIN-NEXT: fnstsw %ax +; X87_WIN-NEXT: xorl %edx, %edx ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_WIN-NEXT: sahf +; X87_WIN-NEXT: setbe %al +; X87_WIN-NEXT: fldz ; X87_WIN-NEXT: ja LBB2_2 ; X87_WIN-NEXT: # %bb.1: -; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fstp %st(0) ; X87_WIN-NEXT: fldz +; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: LBB2_2: -; X87_WIN-NEXT: fstp %st(0) -; X87_WIN-NEXT: setbe %al +; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fsubrp %st, %st(1) ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -787,7 +770,7 @@ ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: movzbl %al, %edx +; X87_WIN-NEXT: movb %al, %dl ; X87_WIN-NEXT: shll $31, %edx ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -800,20 +783,21 @@ ; X87_LIN-NEXT: subl $20, %esp ; X87_LIN-NEXT: fldl {{[0-9]+}}(%esp) ; X87_LIN-NEXT: flds {{\.LCPI.*}} -; X87_LIN-NEXT: fld %st(1) -; X87_LIN-NEXT: fsub %st(1), %st -; X87_LIN-NEXT: fxch %st(1) -; X87_LIN-NEXT: fucomp %st(2) +; X87_LIN-NEXT: fucom %st(1) ; X87_LIN-NEXT: fnstsw %ax +; X87_LIN-NEXT: xorl %edx, %edx ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_LIN-NEXT: sahf +; X87_LIN-NEXT: setbe %al +; X87_LIN-NEXT: fldz ; X87_LIN-NEXT: ja .LBB2_2 ; X87_LIN-NEXT: # %bb.1: -; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fstp %st(0) ; X87_LIN-NEXT: fldz +; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: .LBB2_2: -; X87_LIN-NEXT: fstp %st(0) -; X87_LIN-NEXT: setbe %al +; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fsubrp %st, %st(1) ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -821,7 +805,7 @@ ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: movzbl %al, %edx +; X87_LIN-NEXT: movb %al, %dl ; X87_LIN-NEXT: shll $31, %edx ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1027,13 +1011,13 @@ ; AVX512_32_WIN-NEXT: subl $8, %esp ; AVX512_32_WIN-NEXT: fldt 8(%ebp) ; AVX512_32_WIN-NEXT: flds __real@5f000000 -; AVX512_32_WIN-NEXT: fld %st(1) -; AVX512_32_WIN-NEXT: fsub %st(1), %st ; AVX512_32_WIN-NEXT: xorl %edx, %edx +; AVX512_32_WIN-NEXT: fucomi %st(1), %st +; AVX512_32_WIN-NEXT: fldz ; AVX512_32_WIN-NEXT: fxch %st(1) -; AVX512_32_WIN-NEXT: fucompi %st(2), %st ; AVX512_32_WIN-NEXT: fcmovnbe %st(1), %st ; AVX512_32_WIN-NEXT: fstp %st(1) +; AVX512_32_WIN-NEXT: fsubrp %st, %st(1) ; AVX512_32_WIN-NEXT: fisttpll (%esp) ; AVX512_32_WIN-NEXT: setbe %dl ; AVX512_32_WIN-NEXT: shll $31, %edx @@ -1048,13 +1032,13 @@ ; AVX512_32_LIN-NEXT: subl $12, %esp ; AVX512_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) ; AVX512_32_LIN-NEXT: flds {{\.LCPI.*}} -; AVX512_32_LIN-NEXT: fld %st(1) -; AVX512_32_LIN-NEXT: fsub %st(1), %st ; AVX512_32_LIN-NEXT: xorl %edx, %edx +; AVX512_32_LIN-NEXT: fucomi %st(1), %st +; AVX512_32_LIN-NEXT: fldz ; AVX512_32_LIN-NEXT: fxch %st(1) -; AVX512_32_LIN-NEXT: fucompi %st(2), %st ; AVX512_32_LIN-NEXT: fcmovnbe %st(1), %st ; AVX512_32_LIN-NEXT: fstp %st(1) +; AVX512_32_LIN-NEXT: fsubrp %st, %st(1) ; AVX512_32_LIN-NEXT: fisttpll (%esp) ; AVX512_32_LIN-NEXT: setbe %dl ; AVX512_32_LIN-NEXT: shll $31, %edx @@ -1068,13 +1052,13 @@ ; AVX512_64_WIN-NEXT: pushq %rax ; AVX512_64_WIN-NEXT: fldt (%rcx) ; AVX512_64_WIN-NEXT: flds __real@{{.*}}(%rip) -; AVX512_64_WIN-NEXT: fld %st(1) -; AVX512_64_WIN-NEXT: fsub %st(1), %st ; AVX512_64_WIN-NEXT: xorl %eax, %eax +; AVX512_64_WIN-NEXT: fucomi %st(1), %st +; AVX512_64_WIN-NEXT: fldz ; AVX512_64_WIN-NEXT: fxch %st(1) -; AVX512_64_WIN-NEXT: fucompi %st(2), %st ; AVX512_64_WIN-NEXT: fcmovnbe %st(1), %st ; AVX512_64_WIN-NEXT: fstp %st(1) +; AVX512_64_WIN-NEXT: fsubrp %st, %st(1) ; AVX512_64_WIN-NEXT: fisttpll (%rsp) ; AVX512_64_WIN-NEXT: setbe %al ; AVX512_64_WIN-NEXT: shlq $63, %rax @@ -1086,13 +1070,13 @@ ; AVX512_64_LIN: # %bb.0: ; AVX512_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) ; AVX512_64_LIN-NEXT: flds {{.*}}(%rip) -; AVX512_64_LIN-NEXT: fld %st(1) -; AVX512_64_LIN-NEXT: fsub %st(1), %st ; AVX512_64_LIN-NEXT: xorl %eax, %eax +; AVX512_64_LIN-NEXT: fucomi %st(1), %st +; AVX512_64_LIN-NEXT: fldz ; AVX512_64_LIN-NEXT: fxch %st(1) -; AVX512_64_LIN-NEXT: fucompi %st(2), %st ; AVX512_64_LIN-NEXT: fcmovnbe %st(1), %st ; AVX512_64_LIN-NEXT: fstp %st(1) +; AVX512_64_LIN-NEXT: fsubrp %st, %st(1) ; AVX512_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; AVX512_64_LIN-NEXT: setbe %al ; AVX512_64_LIN-NEXT: shlq $63, %rax @@ -1107,13 +1091,13 @@ ; SSE3_32_WIN-NEXT: subl $8, %esp ; SSE3_32_WIN-NEXT: fldt 8(%ebp) ; SSE3_32_WIN-NEXT: flds __real@5f000000 -; SSE3_32_WIN-NEXT: fld %st(1) -; SSE3_32_WIN-NEXT: fsub %st(1), %st ; SSE3_32_WIN-NEXT: xorl %edx, %edx +; SSE3_32_WIN-NEXT: fucomi %st(1), %st +; SSE3_32_WIN-NEXT: fldz ; SSE3_32_WIN-NEXT: fxch %st(1) -; SSE3_32_WIN-NEXT: fucompi %st(2), %st ; SSE3_32_WIN-NEXT: fcmovnbe %st(1), %st ; SSE3_32_WIN-NEXT: fstp %st(1) +; SSE3_32_WIN-NEXT: fsubrp %st, %st(1) ; SSE3_32_WIN-NEXT: fisttpll (%esp) ; SSE3_32_WIN-NEXT: setbe %dl ; SSE3_32_WIN-NEXT: shll $31, %edx @@ -1128,13 +1112,13 @@ ; SSE3_32_LIN-NEXT: subl $12, %esp ; SSE3_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) ; SSE3_32_LIN-NEXT: flds {{\.LCPI.*}} -; SSE3_32_LIN-NEXT: fld %st(1) -; SSE3_32_LIN-NEXT: fsub %st(1), %st ; SSE3_32_LIN-NEXT: xorl %edx, %edx +; SSE3_32_LIN-NEXT: fucomi %st(1), %st +; SSE3_32_LIN-NEXT: fldz ; SSE3_32_LIN-NEXT: fxch %st(1) -; SSE3_32_LIN-NEXT: fucompi %st(2), %st ; SSE3_32_LIN-NEXT: fcmovnbe %st(1), %st ; SSE3_32_LIN-NEXT: fstp %st(1) +; SSE3_32_LIN-NEXT: fsubrp %st, %st(1) ; SSE3_32_LIN-NEXT: fisttpll (%esp) ; SSE3_32_LIN-NEXT: setbe %dl ; SSE3_32_LIN-NEXT: shll $31, %edx @@ -1148,13 +1132,13 @@ ; SSE3_64_WIN-NEXT: pushq %rax ; SSE3_64_WIN-NEXT: fldt (%rcx) ; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip) -; SSE3_64_WIN-NEXT: fld %st(1) -; SSE3_64_WIN-NEXT: fsub %st(1), %st ; SSE3_64_WIN-NEXT: xorl %eax, %eax +; SSE3_64_WIN-NEXT: fucomi %st(1), %st +; SSE3_64_WIN-NEXT: fldz ; SSE3_64_WIN-NEXT: fxch %st(1) -; SSE3_64_WIN-NEXT: fucompi %st(2), %st ; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st ; SSE3_64_WIN-NEXT: fstp %st(1) +; SSE3_64_WIN-NEXT: fsubrp %st, %st(1) ; SSE3_64_WIN-NEXT: fisttpll (%rsp) ; SSE3_64_WIN-NEXT: setbe %al ; SSE3_64_WIN-NEXT: shlq $63, %rax @@ -1166,13 +1150,13 @@ ; SSE3_64_LIN: # %bb.0: ; SSE3_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) ; SSE3_64_LIN-NEXT: flds {{.*}}(%rip) -; SSE3_64_LIN-NEXT: fld %st(1) -; SSE3_64_LIN-NEXT: fsub %st(1), %st ; SSE3_64_LIN-NEXT: xorl %eax, %eax +; SSE3_64_LIN-NEXT: fucomi %st(1), %st +; SSE3_64_LIN-NEXT: fldz ; SSE3_64_LIN-NEXT: fxch %st(1) -; SSE3_64_LIN-NEXT: fucompi %st(2), %st ; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st ; SSE3_64_LIN-NEXT: fstp %st(1) +; SSE3_64_LIN-NEXT: fsubrp %st, %st(1) ; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; SSE3_64_LIN-NEXT: setbe %al ; SSE3_64_LIN-NEXT: shlq $63, %rax @@ -1187,14 +1171,14 @@ ; SSE2_32_WIN-NEXT: subl $16, %esp ; SSE2_32_WIN-NEXT: fldt 8(%ebp) ; SSE2_32_WIN-NEXT: flds __real@5f000000 -; SSE2_32_WIN-NEXT: fld %st(1) -; SSE2_32_WIN-NEXT: fsub %st(1), %st ; SSE2_32_WIN-NEXT: xorl %edx, %edx +; SSE2_32_WIN-NEXT: fucomi %st(1), %st +; SSE2_32_WIN-NEXT: setbe %dl +; SSE2_32_WIN-NEXT: fldz ; SSE2_32_WIN-NEXT: fxch %st(1) -; SSE2_32_WIN-NEXT: fucompi %st(2), %st ; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st ; SSE2_32_WIN-NEXT: fstp %st(1) -; SSE2_32_WIN-NEXT: setbe %dl +; SSE2_32_WIN-NEXT: fsubrp %st, %st(1) ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE2_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1214,14 +1198,14 @@ ; SSE2_32_LIN-NEXT: subl $20, %esp ; SSE2_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: flds {{\.LCPI.*}} -; SSE2_32_LIN-NEXT: fld %st(1) -; SSE2_32_LIN-NEXT: fsub %st(1), %st ; SSE2_32_LIN-NEXT: xorl %edx, %edx +; SSE2_32_LIN-NEXT: fucomi %st(1), %st +; SSE2_32_LIN-NEXT: setbe %dl +; SSE2_32_LIN-NEXT: fldz ; SSE2_32_LIN-NEXT: fxch %st(1) -; SSE2_32_LIN-NEXT: fucompi %st(2), %st ; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st ; SSE2_32_LIN-NEXT: fstp %st(1) -; SSE2_32_LIN-NEXT: setbe %dl +; SSE2_32_LIN-NEXT: fsubrp %st, %st(1) ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE2_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1240,14 +1224,14 @@ ; SSE2_64_WIN-NEXT: subq $16, %rsp ; SSE2_64_WIN-NEXT: fldt (%rcx) ; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip) -; SSE2_64_WIN-NEXT: fld %st(1) -; SSE2_64_WIN-NEXT: fsub %st(1), %st ; SSE2_64_WIN-NEXT: xorl %eax, %eax +; SSE2_64_WIN-NEXT: fucomi %st(1), %st +; SSE2_64_WIN-NEXT: setbe %al +; SSE2_64_WIN-NEXT: fldz ; SSE2_64_WIN-NEXT: fxch %st(1) -; SSE2_64_WIN-NEXT: fucompi %st(2), %st ; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st ; SSE2_64_WIN-NEXT: fstp %st(1) -; SSE2_64_WIN-NEXT: setbe %al +; SSE2_64_WIN-NEXT: fsubrp %st, %st(1) ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx ; SSE2_64_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1264,14 +1248,14 @@ ; SSE2_64_LIN: # %bb.0: ; SSE2_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: flds {{.*}}(%rip) -; SSE2_64_LIN-NEXT: fld %st(1) -; SSE2_64_LIN-NEXT: fsub %st(1), %st ; SSE2_64_LIN-NEXT: xorl %eax, %eax +; SSE2_64_LIN-NEXT: fucomi %st(1), %st +; SSE2_64_LIN-NEXT: setbe %al +; SSE2_64_LIN-NEXT: fldz ; SSE2_64_LIN-NEXT: fxch %st(1) -; SSE2_64_LIN-NEXT: fucompi %st(2), %st ; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st ; SSE2_64_LIN-NEXT: fstp %st(1) -; SSE2_64_LIN-NEXT: setbe %al +; SSE2_64_LIN-NEXT: fsubrp %st, %st(1) ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; SSE2_64_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1291,20 +1275,21 @@ ; X87_WIN-NEXT: subl $16, %esp ; X87_WIN-NEXT: fldt 8(%ebp) ; X87_WIN-NEXT: flds __real@5f000000 -; X87_WIN-NEXT: fld %st(1) -; X87_WIN-NEXT: fsub %st(1), %st -; X87_WIN-NEXT: fxch %st(1) -; X87_WIN-NEXT: fucomp %st(2) +; X87_WIN-NEXT: fucom %st(1) ; X87_WIN-NEXT: fnstsw %ax +; X87_WIN-NEXT: xorl %edx, %edx ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_WIN-NEXT: sahf +; X87_WIN-NEXT: setbe %al +; X87_WIN-NEXT: fldz ; X87_WIN-NEXT: ja LBB4_2 ; X87_WIN-NEXT: # %bb.1: -; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fstp %st(0) ; X87_WIN-NEXT: fldz +; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: LBB4_2: -; X87_WIN-NEXT: fstp %st(0) -; X87_WIN-NEXT: setbe %al +; X87_WIN-NEXT: fstp %st(1) +; X87_WIN-NEXT: fsubrp %st, %st(1) ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1312,7 +1297,7 @@ ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_WIN-NEXT: movzbl %al, %edx +; X87_WIN-NEXT: movb %al, %dl ; X87_WIN-NEXT: shll $31, %edx ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1325,20 +1310,21 @@ ; X87_LIN-NEXT: subl $20, %esp ; X87_LIN-NEXT: fldt {{[0-9]+}}(%esp) ; X87_LIN-NEXT: flds {{\.LCPI.*}} -; X87_LIN-NEXT: fld %st(1) -; X87_LIN-NEXT: fsub %st(1), %st -; X87_LIN-NEXT: fxch %st(1) -; X87_LIN-NEXT: fucomp %st(2) +; X87_LIN-NEXT: fucom %st(1) ; X87_LIN-NEXT: fnstsw %ax +; X87_LIN-NEXT: xorl %edx, %edx ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87_LIN-NEXT: sahf +; X87_LIN-NEXT: setbe %al +; X87_LIN-NEXT: fldz ; X87_LIN-NEXT: ja .LBB4_2 ; X87_LIN-NEXT: # %bb.1: -; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fstp %st(0) ; X87_LIN-NEXT: fldz +; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: .LBB4_2: -; X87_LIN-NEXT: fstp %st(0) -; X87_LIN-NEXT: setbe %al +; X87_LIN-NEXT: fstp %st(1) +; X87_LIN-NEXT: fsubrp %st, %st(1) ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1346,7 +1332,7 @@ ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87_LIN-NEXT: movzbl %al, %edx +; X87_LIN-NEXT: movb %al, %dl ; X87_LIN-NEXT: shll $31, %edx ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -4483,12 +4483,18 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rax ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcvttss2si %xmm0, %rax ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32: @@ -4505,20 +4511,30 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rax ; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vcvttss2si %xmm1, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vcvttss2si %xmm0, %rax +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v2i64_v2f32: @@ -4539,21 +4555,35 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rdx -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rcx +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rax +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rdx +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rcx ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vcvttss2si %xmm1, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vcvttss2si %xmm2, %rax +; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vcvttss2si %xmm0, %rax +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32: @@ -4578,31 +4608,49 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %rax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %rax ; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm3 +; CHECK-NEXT: cvttss2si %xmm3, %rax ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vcvttss2si %xmm1, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vcvttss2si %xmm2, %rax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vcvttss2si %xmm2, %rax +; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vcvttss2si %xmm0, %rax +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v4i64_v4f32: @@ -4773,12 +4821,18 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rax ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcvttsd2si %xmm0, %rax ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64: @@ -4795,20 +4849,30 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rax ; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vcvttsd2si %xmm1, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vcvttsd2si %xmm0, %rax +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v2i64_v2f64: @@ -4829,21 +4893,35 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rdx -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rcx +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rax +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rdx +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rcx ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vcvttsd2si %xmm1, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vcvttsd2si %xmm2, %rax +; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vcvttsd2si %xmm0, %rax +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64: @@ -4868,31 +4946,49 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorpd %xmm1, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %rax +; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %rax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %rax ; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm3 +; CHECK-NEXT: cvttsd2si %xmm3, %rax ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vcvttsd2si %xmm1, %rax ; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vcvttsd2si %xmm2, %rax ; AVX1-NEXT: vmovq %rax, %xmm2 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vcvttsd2si %xmm2, %rax +; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; AVX1-NEXT: vsubsd %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vcvttsd2si %xmm0, %rax +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v4i64_v4f64: