diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -346,6 +346,10 @@ // The X86-Win64 calling convention always returns __m64 values in RAX. CCIfType<[x86mmx], CCBitConvertToType>, + // GCC returns FP values in RAX on Win64. + CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType>>, + CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType>>, + // Otherwise, everything is the same as 'normal' X86-64 C CC. CCDelegateTo ]>; @@ -613,7 +617,6 @@ // 128 bit vectors are passed by pointer CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect>, - // 256 bit vectors are passed by pointer CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect>, @@ -626,6 +629,16 @@ // The first 4 MMX vector arguments are passed in GPRs. CCIfType<[x86mmx], CCBitConvertToType>, + // If SSE was disabled, pass FP values smaller than 64-bits as integers in + // GPRs or on the stack. + CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType>>, + CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType>>, + + // The first 4 FP/Vector arguments are passed in XMM registers. + CCIfType<[f32, f64], + CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3], + [RCX , RDX , R8 , R9 ]>>, + // The first 4 integer arguments are passed in integer registers. CCIfType<[i8 ], CCAssignToRegWithShadow<[CL , DL , R8B , R9B ], [XMM0, XMM1, XMM2, XMM3]>>, @@ -643,11 +656,6 @@ CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ], [XMM0, XMM1, XMM2, XMM3]>>, - // The first 4 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3], - [RCX , RDX , R8 , R9 ]>>, - // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i8, i16, i32, i64, f32, f64], CCAssignToStack<8, 8>> diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2693,18 +2693,16 @@ assert(VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."); - // If this is x86-64, and we disabled SSE, we can't return FP values, - // or SSE or MMX vectors. - if ((ValVT == MVT::f32 || ValVT == MVT::f64 || - VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && - (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { + // Report an error if we have attempted to return a value via an XMM + // register and SSE was disabled. + if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. - } else if (ValVT == MVT::f64 && - (Subtarget.is64Bit() && !Subtarget.hasSSE2())) { - // Likewise we can't return F64 values with SSE1 only. gcc does so, but - // llvm-gcc has never done it right and no one has noticed, so this - // should be OK for now. + } else if (!Subtarget.hasSSE2() && + X86::FR64XRegClass.contains(VA.getLocReg()) && + ValVT == MVT::f64) { + // When returning a double via an XMM register, report an error if SSE2 is + // not enabled. errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } @@ -2999,7 +2997,6 @@ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Assign locations to each value returned by this call. SmallVector RVLocs; - bool Is64Bit = Subtarget.is64Bit(); CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); @@ -3018,16 +3015,17 @@ RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); } - // If this is x86-64, and we disabled SSE, we can't return FP values - if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && - ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { + // Report an error if there was an attempt to return FP values via XMM + // registers. + if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); if (VA.getLocReg() == X86::XMM1) VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. else VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. - } else if (CopyVT == MVT::f64 && - (Is64Bit && !Subtarget.hasSSE2())) { + } else if (!Subtarget.hasSSE2() && + X86::FR64XRegClass.contains(VA.getLocReg()) && + CopyVT == MVT::f64) { errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); if (VA.getLocReg() == X86::XMM1) VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. @@ -3074,6 +3072,9 @@ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); } + if (VA.getLocInfo() == CCValAssign::BCvt) + Val = DAG.getBitcast(VA.getValVT(), Val); + InVals.push_back(Val); } diff --git a/llvm/test/CodeGen/X86/no-sse-win64.ll b/llvm/test/CodeGen/X86/no-sse-win64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/no-sse-win64.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-windows-msvc < %s -mattr=-sse | FileCheck %s +; RUN: llc -mtriple=x86_64-windows-gnu < %s -mattr=-sse | FileCheck %s + +define void @recv_double(double %v, double* %p) { +; CHECK-LABEL: recv_double: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rcx, (%rdx) +; CHECK-NEXT: retq + store double %v, double* %p + ret void +} + +define void @recv_float(float %v, float* %p) { +; CHECK-LABEL: recv_float: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %ecx, (%rdx) +; CHECK-NEXT: retq + store float %v, float* %p + ret void +} + +define dso_local double @ret_double(double* %p) { +; CHECK-LABEL: ret_double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq (%rcx), %rax +; CHECK-NEXT: retq +entry: + %v = load double, double* %p + ret double %v +} + +define dso_local float @ret_float(float* %p) { +; CHECK-LABEL: ret_float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl (%rcx), %eax +; CHECK-NEXT: retq +entry: + %v = load float, float* %p + ret float %v +} + +declare void @take_double(double) +declare void @take_float(float) + +define void @pass_double(double* %p) { +; CHECK-LABEL: pass_double: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .seh_stackalloc 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movq (%rcx), %rcx +; CHECK-NEXT: callq take_double +; CHECK-NEXT: nop +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_handlerdata +; CHECK-NEXT: .text +; CHECK-NEXT: .seh_endproc + %v = load double, double* %p + call void @take_double(double %v) + ret void +} + +define void @pass_float(float* %p) { +; CHECK-LABEL: pass_float: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .seh_stackalloc 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movl (%rcx), %ecx +; CHECK-NEXT: callq take_float +; CHECK-NEXT: nop +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_handlerdata +; CHECK-NEXT: .text +; CHECK-NEXT: .seh_endproc + %v = load float, float* %p + call void @take_float(float %v) + ret void +} + +declare double @produce_double() +declare float @produce_float() + +define void @call_double(double* %p) { +; CHECK-LABEL: call_double: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rsi +; CHECK-NEXT: .seh_pushreg %rsi +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movq %rcx, %rsi +; CHECK-NEXT: callq produce_double +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rsi +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_handlerdata +; CHECK-NEXT: .text +; CHECK-NEXT: .seh_endproc + %v = call double @produce_double() + store double %v, double* %p + ret void +} + +define void @call_float(float* %p) { +; CHECK-LABEL: call_float: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rsi +; CHECK-NEXT: .seh_pushreg %rsi +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movq %rcx, %rsi +; CHECK-NEXT: callq produce_float +; CHECK-NEXT: movl %eax, (%rsi) +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rsi +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_handlerdata +; CHECK-NEXT: .text +; CHECK-NEXT: .seh_endproc + %v = call float @produce_float() + store float %v, float* %p + ret void +} diff --git a/llvm/test/CodeGen/X86/no-sse-x86.ll b/llvm/test/CodeGen/X86/no-sse-x86.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/no-sse-x86.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=i686 -mattr=+sse | FileCheck %s +; RUN: llc < %s -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-unknown-linux-gnu" +@f = external global float +@d = external global double + +define void @test() nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: calll foo1 +; CHECK-NEXT: fstps f +; CHECK-NEXT: fldl d +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: calll foo2 +; CHECK-NEXT: fstpl d +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: calll foo3 +; CHECK-NEXT: fstps f +; CHECK-NEXT: fldl d +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: calll foo4 +; CHECK-NEXT: fstpl d +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl +; +; NOSSE-LABEL: test: +; NOSSE: # %bb.0: # %entry +; NOSSE-NEXT: subl $12, %esp +; NOSSE-NEXT: flds f +; NOSSE-NEXT: fstps (%esp) +; NOSSE-NEXT: calll foo1 +; NOSSE-NEXT: fstps f +; NOSSE-NEXT: fldl d +; NOSSE-NEXT: fstpl (%esp) +; NOSSE-NEXT: calll foo2 +; NOSSE-NEXT: fstpl d +; NOSSE-NEXT: flds f +; NOSSE-NEXT: fstps (%esp) +; NOSSE-NEXT: calll foo3 +; NOSSE-NEXT: fstps f +; NOSSE-NEXT: fldl d +; NOSSE-NEXT: fstpl (%esp) +; NOSSE-NEXT: calll foo4 +; NOSSE-NEXT: fstpl d +; NOSSE-NEXT: addl $12, %esp +; NOSSE-NEXT: retl +entry: + %0 = load float, float* @f, align 4 + %1 = tail call inreg float @foo1(float inreg %0) nounwind + store float %1, float* @f, align 4 + %2 = load double, double* @d, align 8 + %3 = tail call inreg double @foo2(double inreg %2) nounwind + store double %3, double* @d, align 8 + %4 = load float, float* @f, align 4 + %5 = tail call inreg float @foo3(float inreg %4) nounwind + store float %5, float* @f, align 4 + %6 = load double, double* @d, align 8 + %7 = tail call inreg double @foo4(double inreg %6) nounwind + store double %7, double* @d, align 8 + ret void +} + +declare inreg float @foo1(float inreg) + +declare inreg double @foo2(double inreg) + +declare inreg float @foo3(float inreg) + +declare inreg double @foo4(double inreg) diff --git a/llvm/test/CodeGen/X86/nosse-error2.ll b/llvm/test/CodeGen/X86/nosse-error2.ll deleted file mode 100644 --- a/llvm/test/CodeGen/X86/nosse-error2.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: not llc < %s -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s -; RUN: llc < %s -mcpu=i686 -mattr=+sse | FileCheck %s - -; NOSSE: {{SSE register return with SSE disabled}} - -; CHECK: xmm - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-unknown-linux-gnu" -@f = external global float ; [#uses=4] -@d = external global double ; [#uses=4] - -define void @test() nounwind { -entry: - %0 = load float, float* @f, align 4 ; [#uses=1] - %1 = tail call inreg float @foo1(float inreg %0) nounwind ; [#uses=1] - store float %1, float* @f, align 4 - %2 = load double, double* @d, align 8 ; [#uses=1] - %3 = tail call inreg double @foo2(double inreg %2) nounwind ; [#uses=1] - store double %3, double* @d, align 8 - %4 = load float, float* @f, align 4 ; [#uses=1] - %5 = tail call inreg float @foo3(float inreg %4) nounwind ; [#uses=1] - store float %5, float* @f, align 4 - %6 = load double, double* @d, align 8 ; [#uses=1] - %7 = tail call inreg double @foo4(double inreg %6) nounwind ; [#uses=1] - store double %7, double* @d, align 8 - ret void -} - -declare inreg float @foo1(float inreg) - -declare inreg double @foo2(double inreg) - -declare inreg float @foo3(float inreg) - -declare inreg double @foo4(double inreg)