Index: llvm/trunk/lib/Target/X86/X86CallingConv.td =================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.td +++ llvm/trunk/lib/Target/X86/X86CallingConv.td @@ -18,6 +18,12 @@ "(State.getMachineFunction().getSubtarget()).", F), A>; +/// CCIfNotSubtarget - Match if the current subtarget doesn't has a feature F. +class CCIfNotSubtarget + : CCIf" + "(State.getMachineFunction().getSubtarget()).", F), + A>; + // Register classes for RegCall class RC_X86_RegCall { list GPR_8 = []; @@ -246,8 +252,9 @@ // MM0, it doesn't support these vector types. CCIfType<[x86mmx], CCAssignToReg<[MM0]>>, - // Long double types are always returned in FP0 (even with SSE). - CCIfType<[f80], CCAssignToReg<[FP0, FP1]>> + // Long double types are always returned in FP0 (even with SSE), + // except on Win64. + CCIfNotSubtarget<"isTargetWin64()", CCIfType<[f80], CCAssignToReg<[FP0, FP1]>>> ]>; // X86-32 C return-value convention. @@ -605,6 +612,9 @@ // 512 bit vectors are passed by pointer CCIfType<[v16i32, v16f32, v8f64, v8i64], CCPassIndirect>, + // Long doubles are passed by pointer + CCIfType<[f80], CCPassIndirect>, + // The first 4 MMX vector arguments are passed in GPRs. CCIfType<[x86mmx], CCBitConvertToType>, @@ -628,11 +638,7 @@ // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Long doubles get stack slots whose size and alignment depends on the - // subtarget. - CCIfType<[f80], CCAssignToStack<0, 0>> + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> ]>; def CC_X86_Win64_VectorCall : CallingConv<[ Index: llvm/trunk/test/CodeGen/X86/finite-libcalls.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/finite-libcalls.ll +++ llvm/trunk/test/CodeGen/X86/finite-libcalls.ll @@ -50,11 +50,19 @@ ; ; WIN-LABEL: exp_f80: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: fldt (%rdx) ; WIN-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq expl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: fstpt (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; MAC-LABEL: exp_f80: @@ -113,11 +121,19 @@ ; ; WIN-LABEL: exp2_f80: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: fldt (%rdx) ; WIN-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq exp2l -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: fstpt (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; MAC-LABEL: exp2_f80: @@ -176,11 +192,19 @@ ; ; WIN-LABEL: log_f80: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: fldt (%rdx) ; WIN-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq logl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: fstpt (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; MAC-LABEL: log_f80: @@ -239,11 +263,19 @@ ; ; WIN-LABEL: log2_f80: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: fldt (%rdx) ; WIN-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq log2l -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: fstpt (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; MAC-LABEL: log2_f80: @@ -302,11 +334,19 @@ ; ; WIN-LABEL: log10_f80: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: fldt (%rdx) ; WIN-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq log10l -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: fstpt (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; MAC-LABEL: log10_f80: @@ -373,13 +413,22 @@ ; ; WIN-LABEL: pow_f80: ; WIN: # %bb.0: -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: fldt (%rdx) ; WIN-NEXT: fld %st(0) ; WIN-NEXT: fstpt {{[0-9]+}}(%rsp) ; WIN-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq powl -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN-NEXT: fstpt (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; MAC-LABEL: pow_f80: Index: llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll +++ llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -918,7 +918,7 @@ ; AVX512_64_WIN-LABEL: x_to_u64: ; AVX512_64_WIN: # %bb.0: ; AVX512_64_WIN-NEXT: pushq %rax -; AVX512_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512_64_WIN-NEXT: fldt (%rcx) ; AVX512_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; AVX512_64_WIN-NEXT: fld %st(1) ; AVX512_64_WIN-NEXT: fsub %st(1) @@ -1003,25 +1003,25 @@ ; ; SSE3_64_WIN-LABEL: x_to_u64: ; SSE3_64_WIN: # %bb.0: -; SSE3_64_WIN-NEXT: subq $24, %rsp -; SSE3_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE3_64_WIN-NEXT: subq $16, %rsp +; SSE3_64_WIN-NEXT: fldt (%rcx) ; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE3_64_WIN-NEXT: fld %st(1) ; SSE3_64_WIN-NEXT: fsub %st(1) ; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp) ; SSE3_64_WIN-NEXT: fld %st(1) -; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp) +; SSE3_64_WIN-NEXT: fisttpll (%rsp) ; SSE3_64_WIN-NEXT: fucompi %st(1) ; SSE3_64_WIN-NEXT: fstp %st(0) ; SSE3_64_WIN-NEXT: jbe .LBB4_1 ; SSE3_64_WIN-NEXT: # %bb.2: -; SSE3_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax -; SSE3_64_WIN-NEXT: addq $24, %rsp +; SSE3_64_WIN-NEXT: movq (%rsp), %rax +; SSE3_64_WIN-NEXT: addq $16, %rsp ; SSE3_64_WIN-NEXT: retq ; SSE3_64_WIN-NEXT: .LBB4_1: ; SSE3_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; SSE3_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax -; SSE3_64_WIN-NEXT: addq $24, %rsp +; SSE3_64_WIN-NEXT: addq $16, %rsp ; SSE3_64_WIN-NEXT: retq ; ; SSE3_64_LIN-LABEL: x_to_u64: @@ -1103,7 +1103,7 @@ ; SSE2_64_WIN-LABEL: x_to_u64: ; SSE2_64_WIN: # %bb.0: ; SSE2_64_WIN-NEXT: subq $24, %rsp -; SSE2_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2_64_WIN-NEXT: fldt (%rcx) ; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE2_64_WIN-NEXT: fld %st(1) ; SSE2_64_WIN-NEXT: fsub %st(1) @@ -1268,7 +1268,7 @@ ; AVX512_64_WIN-LABEL: x_to_s64: ; AVX512_64_WIN: # %bb.0: ; AVX512_64_WIN-NEXT: pushq %rax -; AVX512_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512_64_WIN-NEXT: fldt (%rcx) ; AVX512_64_WIN-NEXT: fisttpll (%rsp) ; AVX512_64_WIN-NEXT: movq (%rsp), %rax ; AVX512_64_WIN-NEXT: popq %rcx @@ -1308,7 +1308,7 @@ ; SSE3_64_WIN-LABEL: x_to_s64: ; SSE3_64_WIN: # %bb.0: ; SSE3_64_WIN-NEXT: pushq %rax -; SSE3_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE3_64_WIN-NEXT: fldt (%rcx) ; SSE3_64_WIN-NEXT: fisttpll (%rsp) ; SSE3_64_WIN-NEXT: movq (%rsp), %rax ; SSE3_64_WIN-NEXT: popq %rcx @@ -1359,8 +1359,8 @@ ; ; SSE2_64_WIN-LABEL: x_to_s64: ; SSE2_64_WIN: # %bb.0: -; SSE2_64_WIN-NEXT: subq $24, %rsp -; SSE2_64_WIN-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2_64_WIN-NEXT: subq $16, %rsp +; SSE2_64_WIN-NEXT: fldt (%rcx) ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax ; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F @@ -1369,7 +1369,7 @@ ; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax -; SSE2_64_WIN-NEXT: addq $24, %rsp +; SSE2_64_WIN-NEXT: addq $16, %rsp ; SSE2_64_WIN-NEXT: retq ; ; SSE2_64_LIN-LABEL: x_to_s64: Index: llvm/trunk/test/CodeGen/X86/win64-long-double.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/win64-long-double.ll +++ llvm/trunk/test/CodeGen/X86/win64-long-double.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple x86_64-w64-mingw32 %s -o - | FileCheck %s + +@glob = common dso_local local_unnamed_addr global x86_fp80 0xK00000000000000000000, align 16 + +define dso_local void @call() { +entry: + %0 = load x86_fp80, x86_fp80* @glob, align 16 + %1 = tail call x86_fp80 @floorl(x86_fp80 %0) + store x86_fp80 %1, x86_fp80* @glob, align 16 + ret void +} + +declare x86_fp80 @floorl(x86_fp80) + +; CHECK-LABEL: call +; CHECK: fldt glob(%rip) +; CHECK: fstpt [[ARGOFF:[0-9]+]](%rsp) +; CHECK: leaq [[RETOFF:[0-9]+]](%rsp), %rcx +; CHECK: leaq [[ARGOFF]](%rsp), %rdx +; CHECK: callq floorl +; CHECK: fldt [[RETOFF]](%rsp) +; CHECK: fstpt glob(%rip)