Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -543,15 +543,12 @@ setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); - // Expand FP immediates into loads from the stack, except for the special - // cases we handle. - addLegalFPImmediate(APFloat(+0.0)); // xorpd - addLegalFPImmediate(APFloat(+0.0f)); // xorps - } else if (UseX87 && X86ScalarSSEf32) { + } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. addRegisterClass(MVT::f32, &X86::FR32RegClass); - addRegisterClass(MVT::f64, &X86::RFP64RegClass); + if (UseX87) + addRegisterClass(MVT::f64, &X86::RFP64RegClass); // Use ANDPS to simulate FABS. setOperationAction(ISD::FABS , MVT::f32, Custom); @@ -559,10 +556,12 @@ // Use XORP to simulate FNEG. setOperationAction(ISD::FNEG , MVT::f32, Custom); - setOperationAction(ISD::UNDEF, MVT::f64, Expand); + if (UseX87) + setOperationAction(ISD::UNDEF, MVT::f64, Expand); // Use ANDPS and ORPS to simulate FCOPYSIGN. - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + if (UseX87) + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // We don't support sin/cos/fmod @@ -570,17 +569,12 @@ setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - // Special cases we handle for FP constants. - addLegalFPImmediate(APFloat(+0.0f)); // xorps - addLegalFPImmediate(APFloat(+0.0)); // FLD0 - addLegalFPImmediate(APFloat(+1.0)); // FLD1 - addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS - addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS - - // Always expand sin/cos functions even though x87 has an instruction. - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + if (UseX87) { + // Always expand sin/cos functions even though x87 has an instruction. + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + } } else if (UseX87) { // f32 and f64 in x87. // Set up the FP register classes. @@ -596,14 +590,27 @@ setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); } - addLegalFPImmediate(APFloat(+0.0)); // FLD0 - addLegalFPImmediate(APFloat(+1.0)); // FLD1 - addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS - addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS - addLegalFPImmediate(APFloat(+0.0f)); // FLD0 - addLegalFPImmediate(APFloat(+1.0f)); // FLD1 - addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS - addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS + } + + // Expand FP32 immediates into loads from the stack, save special cases. + if (isTypeLegal(MVT::f32)) { + if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) { + addLegalFPImmediate(APFloat(+0.0f)); // FLD0 + addLegalFPImmediate(APFloat(+1.0f)); // FLD1 + addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS + addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS + } else // SSE + addLegalFPImmediate(APFloat(+0.0f)); // xorps + } + // Expand FP64 immediates into loads from the stack, save special cases. + if (isTypeLegal(MVT::f64)) { + if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) { + addLegalFPImmediate(APFloat(+0.0)); // FLD0 + addLegalFPImmediate(APFloat(+1.0)); // FLD1 + addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS + addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS + } else + addLegalFPImmediate(APFloat(+0.0)); // xorpd } // We don't support FMA. @@ -1936,7 +1943,8 @@ if (Subtarget.hasSSE2()) return MVT::v16i8; // TODO: Can SSE1 handle a byte vector? - if (Subtarget.hasSSE1()) + // If we have SSE1 registers we should be able to use them. + if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87())) return MVT::v4f32; } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { Index: llvm/test/CodeGen/X86/pr38738.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/pr38738.ll @@ -0,0 +1,254 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -mattr=-x87,+sse,-sse2 %s | FileCheck --check-prefixes=X64SSE %s +; RUN: llc -mtriple=i686-unknown-linux-gnu -o - -mattr=-x87,+sse,-sse2 %s | FileCheck --check-prefixes=X86SSE %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -mattr=-x87,+sse2,-sse3 %s | FileCheck --check-prefixes=X64SSE2 %s +; RUN: llc -mtriple=i686-unknown-linux-gnu -o - -mattr=-x87,+sse2,-sse3 %s | FileCheck --check-prefixes=X86SSE2 %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - -mattr=-x87,+avx,-avx2 %s | FileCheck --check-prefixes=X64AVX %s +; RUN: llc -mtriple=i686-unknown-linux-gnu -o - -mattr=-x87,+avx,-avx2 %s | FileCheck --check-prefixes=X86AVX %s + + +%struct.params = type { double, double } + +define dso_local i32 @pr38738() { +; X64SSE-LABEL: pr38738: +; X64SSE: # %bb.0: # %entry +; X64SSE-NEXT: xorps %xmm0, %xmm0 +; X64SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64SSE-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64SSE-NEXT: retq +; +; X86SSE-LABEL: pr38738: +; X86SSE: # %bb.0: # %entry +; X86SSE-NEXT: subl $28, %esp +; X86SSE-NEXT: .cfi_def_cfa_offset 32 +; X86SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86SSE-NEXT: movl $0, (%esp) +; X86SSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86SSE-NEXT: addl $28, %esp +; X86SSE-NEXT: .cfi_def_cfa_offset 4 +; X86SSE-NEXT: retl +; +; X64SSE2-LABEL: pr38738: +; X64SSE2: # %bb.0: # %entry +; X64SSE2-NEXT: xorps %xmm0, %xmm0 +; X64SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64SSE2-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64SSE2-NEXT: retq +; +; X86SSE2-LABEL: pr38738: +; X86SSE2: # %bb.0: # %entry +; X86SSE2-NEXT: subl $44, %esp +; X86SSE2-NEXT: .cfi_def_cfa_offset 48 +; X86SSE2-NEXT: xorps %xmm0, %xmm0 +; X86SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; X86SSE2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86SSE2-NEXT: addl $44, %esp +; X86SSE2-NEXT: .cfi_def_cfa_offset 4 +; X86SSE2-NEXT: retl +; +; X64AVX-LABEL: pr38738: +; X64AVX: # %bb.0: # %entry +; X64AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64AVX-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64AVX-NEXT: retq +; +; X86AVX-LABEL: pr38738: +; X86AVX: # %bb.0: # %entry +; X86AVX-NEXT: subl $44, %esp +; X86AVX-NEXT: .cfi_def_cfa_offset 48 +; X86AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) +; X86AVX-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86AVX-NEXT: addl $44, %esp +; X86AVX-NEXT: .cfi_def_cfa_offset 4 +; X86AVX-NEXT: retl +entry: + %retval = alloca i32, align 4 + %dlg_sys_param = alloca %struct.params, align 8 + %total_active_bw = alloca float, align 4 + %0 = bitcast %struct.params* %dlg_sys_param to i8* + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 16, i1 false) + store float 0.000000e+00, float* %total_active_bw, align 4 + %1 = load i32, i32* %retval, align 4 + ret i32 %1 +} + +define dso_local void @tryset(i8* nocapture %x) local_unnamed_addr { +; X64SSE-LABEL: tryset: +; X64SSE: # %bb.0: +; X64SSE-NEXT: movq $0, 56(%rdi) +; X64SSE-NEXT: movq $0, 48(%rdi) +; X64SSE-NEXT: movq $0, 40(%rdi) +; X64SSE-NEXT: movq $0, 32(%rdi) +; X64SSE-NEXT: movq $0, 24(%rdi) +; X64SSE-NEXT: movq $0, 16(%rdi) +; X64SSE-NEXT: movq $0, 8(%rdi) +; X64SSE-NEXT: movq $0, (%rdi) +; X64SSE-NEXT: retq +; +; X86SSE-LABEL: tryset: +; X86SSE: # %bb.0: +; X86SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86SSE-NEXT: movl $0, 60(%eax) +; X86SSE-NEXT: movl $0, 56(%eax) +; X86SSE-NEXT: movl $0, 52(%eax) +; X86SSE-NEXT: movl $0, 48(%eax) +; X86SSE-NEXT: movl $0, 44(%eax) +; X86SSE-NEXT: movl $0, 40(%eax) +; X86SSE-NEXT: movl $0, 36(%eax) +; X86SSE-NEXT: movl $0, 32(%eax) +; X86SSE-NEXT: movl $0, 28(%eax) +; X86SSE-NEXT: movl $0, 24(%eax) +; X86SSE-NEXT: movl $0, 20(%eax) +; X86SSE-NEXT: movl $0, 16(%eax) +; X86SSE-NEXT: movl $0, 12(%eax) +; X86SSE-NEXT: movl $0, 8(%eax) +; X86SSE-NEXT: movl $0, 4(%eax) +; X86SSE-NEXT: movl $0, (%eax) +; X86SSE-NEXT: retl +; +; X64SSE2-LABEL: tryset: +; X64SSE2: # %bb.0: +; X64SSE2-NEXT: movq $0, 56(%rdi) +; X64SSE2-NEXT: movq $0, 48(%rdi) +; X64SSE2-NEXT: movq $0, 40(%rdi) +; X64SSE2-NEXT: movq $0, 32(%rdi) +; X64SSE2-NEXT: movq $0, 24(%rdi) +; X64SSE2-NEXT: movq $0, 16(%rdi) +; X64SSE2-NEXT: movq $0, 8(%rdi) +; X64SSE2-NEXT: movq $0, (%rdi) +; X64SSE2-NEXT: retq +; +; X86SSE2-LABEL: tryset: +; X86SSE2: # %bb.0: +; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86SSE2-NEXT: movl $0, 4(%eax) +; X86SSE2-NEXT: movl $0, (%eax) +; X86SSE2-NEXT: movl $0, 12(%eax) +; X86SSE2-NEXT: movl $0, 8(%eax) +; X86SSE2-NEXT: movl $0, 20(%eax) +; X86SSE2-NEXT: movl $0, 16(%eax) +; X86SSE2-NEXT: movl $0, 28(%eax) +; X86SSE2-NEXT: movl $0, 24(%eax) +; X86SSE2-NEXT: movl $0, 36(%eax) +; X86SSE2-NEXT: movl $0, 32(%eax) +; X86SSE2-NEXT: movl $0, 44(%eax) +; X86SSE2-NEXT: movl $0, 40(%eax) +; X86SSE2-NEXT: movl $0, 52(%eax) +; X86SSE2-NEXT: movl $0, 48(%eax) +; X86SSE2-NEXT: movl $0, 60(%eax) +; X86SSE2-NEXT: movl $0, 56(%eax) +; X86SSE2-NEXT: retl +; +; X64AVX-LABEL: tryset: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64AVX-NEXT: vmovups %ymm0, 32(%rdi) +; X64AVX-NEXT: vmovups %ymm0, (%rdi) +; X64AVX-NEXT: vzeroupper +; X64AVX-NEXT: retq +; +; X86AVX-LABEL: tryset: +; X86AVX: # %bb.0: +; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86AVX-NEXT: vmovups %ymm0, 32(%eax) +; X86AVX-NEXT: vmovups %ymm0, (%eax) +; X86AVX-NEXT: vzeroupper +; X86AVX-NEXT: retl + tail call void @llvm.memset.p0i8.i64(i8* align 1 %x, i8 0, i64 64, i1 false) + ret void +} + +define dso_local void @trycpy(i8* nocapture %x, i8* nocapture readonly %y) local_unnamed_addr { +; X64SSE-LABEL: trycpy: +; X64SSE: # %bb.0: +; X64SSE-NEXT: movq 24(%rsi), %rax +; X64SSE-NEXT: movq %rax, 24(%rdi) +; X64SSE-NEXT: movq 16(%rsi), %rax +; X64SSE-NEXT: movq %rax, 16(%rdi) +; X64SSE-NEXT: movq (%rsi), %rax +; X64SSE-NEXT: movq 8(%rsi), %rcx +; X64SSE-NEXT: movq %rcx, 8(%rdi) +; X64SSE-NEXT: movq %rax, (%rdi) +; X64SSE-NEXT: retq +; +; X86SSE-LABEL: trycpy: +; X86SSE: # %bb.0: +; X86SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86SSE-NEXT: movl 28(%ecx), %edx +; X86SSE-NEXT: movl %edx, 28(%eax) +; X86SSE-NEXT: movl 24(%ecx), %edx +; X86SSE-NEXT: movl %edx, 24(%eax) +; X86SSE-NEXT: movl 20(%ecx), %edx +; X86SSE-NEXT: movl %edx, 20(%eax) +; X86SSE-NEXT: movl 16(%ecx), %edx +; X86SSE-NEXT: movl %edx, 16(%eax) +; X86SSE-NEXT: movl 12(%ecx), %edx +; X86SSE-NEXT: movl %edx, 12(%eax) +; X86SSE-NEXT: movl 8(%ecx), %edx +; X86SSE-NEXT: movl %edx, 8(%eax) +; X86SSE-NEXT: movl (%ecx), %edx +; X86SSE-NEXT: movl 4(%ecx), %ecx +; X86SSE-NEXT: movl %ecx, 4(%eax) +; X86SSE-NEXT: movl %edx, (%eax) +; X86SSE-NEXT: retl +; +; X64SSE2-LABEL: trycpy: +; X64SSE2: # %bb.0: +; X64SSE2-NEXT: movq 24(%rsi), %rax +; X64SSE2-NEXT: movq %rax, 24(%rdi) +; X64SSE2-NEXT: movq 16(%rsi), %rax +; X64SSE2-NEXT: movq %rax, 16(%rdi) +; X64SSE2-NEXT: movq (%rsi), %rax +; X64SSE2-NEXT: movq 8(%rsi), %rcx +; X64SSE2-NEXT: movq %rcx, 8(%rdi) +; X64SSE2-NEXT: movq %rax, (%rdi) +; X64SSE2-NEXT: retq +; +; X86SSE2-LABEL: trycpy: +; X86SSE2: # %bb.0: +; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86SSE2-NEXT: movsd %xmm0, 24(%eax) +; X86SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86SSE2-NEXT: movsd %xmm0, 16(%eax) +; X86SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86SSE2-NEXT: movsd %xmm1, 8(%eax) +; X86SSE2-NEXT: movsd %xmm0, (%eax) +; X86SSE2-NEXT: retl +; +; X64AVX-LABEL: trycpy: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vmovups (%rsi), %ymm0 +; X64AVX-NEXT: vmovups %ymm0, (%rdi) +; X64AVX-NEXT: vzeroupper +; X64AVX-NEXT: retq +; +; X86AVX-LABEL: trycpy: +; X86AVX: # %bb.0: +; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86AVX-NEXT: vmovups (%ecx), %ymm0 +; X86AVX-NEXT: vmovups %ymm0, (%eax) +; X86AVX-NEXT: vzeroupper +; X86AVX-NEXT: retl + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %x, i8* align 1 %y, i64 32, i1 false) + ret void +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #2 + Index: llvm/test/CodeGen/X86/x87.ll =================================================================== --- llvm/test/CodeGen/X86/x87.ll +++ llvm/test/CodeGen/X86/x87.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=i686-- | FileCheck %s -check-prefix=X87 -; RUN: llc < %s -mtriple=x86_64-- -mattr=-sse | FileCheck %s -check-prefix=X87 -; RUN: llc < %s -mtriple=i686-- -mattr=-x87 | FileCheck %s -check-prefix=NOX87 -; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,-sse | FileCheck %s -check-prefix=NOX87 -; RUN: llc < %s -mtriple=i686-- -mattr=-x87,+sse | FileCheck %s -check-prefix=NOX87 -; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,-sse2 | FileCheck %s -check-prefix=NOX87 +; RUN: llc < %s -mtriple=i686-- | FileCheck %s -check-prefixes=X8732,X87 +; RUN: llc < %s -mtriple=x86_64-- -mattr=-sse | FileCheck %s -check-prefixes=X8732,X87 +; RUN: llc < %s -mtriple=i686-- -mattr=-x87 | FileCheck %s -check-prefixes=NOX8732,NOX87 +; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,-sse | FileCheck %s -check-prefixes=NOX8732,NOX87 +; RUN: llc < %s -mtriple=i686-- -mattr=-x87,+sse | FileCheck %s -check-prefixes=NOX8732,NOX87 +; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,-sse2 | FileCheck %s -check-prefixes=X8732_SSE,NOX87 define void @test(i32 %i, i64 %l, float* %pf, double* %pd, fp128* %pld) nounwind readnone { ; X87-LABEL: test: @@ -12,18 +12,18 @@ ; NOX87-NOT: {{ }}f{{.*}} ; X87: fild -; NOX87: __floatunsisf +; NOX8732: __floatunsisf %tmp = uitofp i32 %i to float -; X87: fild -; NOX87: __floatdisf +; X8732: fild +; NOX8732: __floatdisf %tmp1 = sitofp i64 %l to float -; X87: fadd -; NOX87: __addsf3 +; X8732: fadd +; NOX8732: __addsf3 %tmp2 = fadd float %tmp, %tmp1 -; X87: fstp +; X8732: fstp store float %tmp2, float* %pf ; X87: fild