diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21238,7 +21238,10 @@ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && Subtarget.hasSSE2() && !IsStrict) return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); - if (SrcVT == MVT::i32 && Subtarget.hasSSE2() && DstVT != MVT::f80) + // The transform for i32->f64/f32 isn't correct for 0 when rounding to + // negative infinity. So disable under strictfp. Using FILD instead. + if (SrcVT == MVT::i32 && Subtarget.hasSSE2() && DstVT != MVT::f80 && + !IsStrict) return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); if (Subtarget.is64Bit() && SrcVT == MVT::i64 && (DstVT == MVT::f32 || DstVT == MVT::f64)) diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2434,15 +2434,16 @@ ; ; X86-SSE-LABEL: uifdi: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: subl $12, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 16 -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: movsd %xmm0, (%esp) -; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: subl $20, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 24 +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: fildll (%esp) +; X86-SSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-SSE-NEXT: wait -; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: addl $20, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl ; @@ -2655,16 +2656,16 @@ ; ; X86-SSE-LABEL: uiffi: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: pushl %eax -; X86-SSE-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 -; X86-SSE-NEXT: movss %xmm0, (%esp) -; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: subl $20, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 24 +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) +; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE-NEXT: flds {{[0-9]+}}(%esp) ; X86-SSE-NEXT: wait -; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: addl $20, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll --- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll @@ -485,17 +485,23 @@ define float @uitofp_i32tof32(i32 %x) #0 { ; SSE-X86-LABEL: uitofp_i32tof32: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: pushl %ebp ; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-X86-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; SSE-X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; SSE-X86-NEXT: cvtsd2ss %xmm0, %xmm0 -; SSE-X86-NEXT: movss %xmm0, (%esp) -; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $16, %esp +; SSE-X86-NEXT: movl 8(%ebp), %eax +; SSE-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; SSE-X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp) +; SSE-X86-NEXT: flds {{[0-9]+}}(%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: popl %eax -; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: uitofp_i32tof32: @@ -506,17 +512,23 @@ ; ; AVX1-X86-LABEL: uitofp_i32tof32: ; AVX1-X86: # %bb.0: -; AVX1-X86-NEXT: pushl %eax +; AVX1-X86-NEXT: pushl %ebp ; AVX1-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX1-X86-NEXT: vorpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; AVX1-X86-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; AVX1-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 -; AVX1-X86-NEXT: vmovss %xmm0, (%esp) -; AVX1-X86-NEXT: flds (%esp) +; AVX1-X86-NEXT: .cfi_offset %ebp, -8 +; AVX1-X86-NEXT: movl %esp, %ebp +; AVX1-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX1-X86-NEXT: andl $-8, %esp +; AVX1-X86-NEXT: subl $16, %esp +; AVX1-X86-NEXT: movl 8(%ebp), %eax +; AVX1-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; AVX1-X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; AVX1-X86-NEXT: fildll {{[0-9]+}}(%esp) +; AVX1-X86-NEXT: fstps {{[0-9]+}}(%esp) +; AVX1-X86-NEXT: flds {{[0-9]+}}(%esp) ; AVX1-X86-NEXT: wait -; AVX1-X86-NEXT: popl %eax -; AVX1-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX1-X86-NEXT: movl %ebp, %esp +; AVX1-X86-NEXT: popl %ebp +; AVX1-X86-NEXT: .cfi_def_cfa %esp, 4 ; AVX1-X86-NEXT: retl ; ; AVX1-X64-LABEL: uitofp_i32tof32: @@ -1162,12 +1174,13 @@ ; SSE-X86-NEXT: movl %esp, %ebp ; SSE-X86-NEXT: .cfi_def_cfa_register %ebp ; SSE-X86-NEXT: andl $-8, %esp -; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-X86-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; SSE-X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; SSE-X86-NEXT: movsd %xmm0, (%esp) -; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: subl $16, %esp +; SSE-X86-NEXT: movl 8(%ebp), %eax +; SSE-X86-NEXT: movl %eax, (%esp) +; SSE-X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fildll (%esp) +; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fldl {{[0-9]+}}(%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp @@ -1188,12 +1201,13 @@ ; AVX1-X86-NEXT: movl %esp, %ebp ; AVX1-X86-NEXT: .cfi_def_cfa_register %ebp ; AVX1-X86-NEXT: andl $-8, %esp -; AVX1-X86-NEXT: subl $8, %esp -; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX1-X86-NEXT: vorpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; AVX1-X86-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; AVX1-X86-NEXT: vmovsd %xmm0, (%esp) -; AVX1-X86-NEXT: fldl (%esp) +; AVX1-X86-NEXT: subl $16, %esp +; AVX1-X86-NEXT: movl 8(%ebp), %eax +; AVX1-X86-NEXT: movl %eax, (%esp) +; AVX1-X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; AVX1-X86-NEXT: fildll (%esp) +; AVX1-X86-NEXT: fstpl {{[0-9]+}}(%esp) +; AVX1-X86-NEXT: fldl {{[0-9]+}}(%esp) ; AVX1-X86-NEXT: wait ; AVX1-X86-NEXT: movl %ebp, %esp ; AVX1-X86-NEXT: popl %ebp