Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -15382,10 +15382,11 @@ MemOps.push_back(Store); // Store ptr to reg_save_area. - FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL)); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( + Subtarget->isTarget64BitLP64() ? 8 : 4, DL)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); - Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, - MachinePointerInfo(SV, 16), false, false, 0); + Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo( + SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } Index: test/CodeGen/X86/musttail-varargs.ll =================================================================== --- test/CodeGen/X86/musttail-varargs.ll +++ test/CodeGen/X86/musttail-varargs.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX +; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux-gnux32 | FileCheck %s --check-prefix=LINUX-X32 ; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS ; RUN: llc < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86 @@ -57,6 +58,40 @@ ; LINUX-DAG: movb {{.*}}, %al ; LINUX: jmpq *{{.*}} # TAILCALL +; LINUX-X32-LABEL: f_thunk: +; LINUX-X32-DAG: movl %edi, {{.*}} +; LINUX-X32-DAG: movq %rsi, {{.*}} +; LINUX-X32-DAG: movq %rdx, {{.*}} +; LINUX-X32-DAG: movq %rcx, {{.*}} +; LINUX-X32-DAG: movq %r8, {{.*}} +; LINUX-X32-DAG: movq %r9, {{.*}} +; LINUX-X32-DAG: movb %al, {{.*}} +; LINUX-X32-DAG: movaps %xmm0, {{[0-9]*}}(%esp) +; LINUX-X32-DAG: movaps %xmm1, {{[0-9]*}}(%esp) +; LINUX-X32-DAG: movaps %xmm2, {{[0-9]*}}(%esp) +; LINUX-X32-DAG: movaps %xmm3, {{[0-9]*}}(%esp) +; LINUX-X32-DAG: movaps %xmm4, {{[0-9]*}}(%esp) +; LINUX-X32-DAG: movaps %xmm5, {{[0-9]*}}(%esp) +; LINUX-X32-DAG: movaps %xmm6, {{[0-9]*}}(%esp) +; LINUX-X32-DAG: movaps %xmm7, {{[0-9]*}}(%esp) +; LINUX-X32: callq get_f +; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm0 +; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm1 +; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm2 +; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm3 +; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm4 +; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm5 +; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm6 +; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm7 +; LINUX-X32-DAG: movl {{.*}}, %edi +; LINUX-X32-DAG: movq {{.*}}, %rsi +; LINUX-X32-DAG: movq {{.*}}, %rdx +; LINUX-X32-DAG: movq {{.*}}, %rcx +; LINUX-X32-DAG: movq {{.*}}, %r8 +; LINUX-X32-DAG: movq {{.*}}, %r9 +; LINUX-X32-DAG: movb {{.*}}, %al +; LINUX-X32: jmpq *{{.*}} # TAILCALL + ; WINDOWS-LABEL: f_thunk: ; WINDOWS-NOT: mov{{.}}ps ; WINDOWS-DAG: movq %rdx, {{.*}} @@ -92,6 +127,10 @@ ; LINUX-NOT: movq ; LINUX: jmpq *%rdi # TAILCALL +; LINUX-X32-LABEL: g_thunk: +; LINUX-X32-DAG: movl %edi, %[[REG:e[abcd]x|ebp|esi|edi|r8|r9|r1[0-5]]] +; LINUX-X32-DAG: jmpq *%[[REG]] # TAILCALL + ; WINDOWS-LABEL: g_thunk: ; WINDOWS-NOT: movq ; WINDOWS: jmpq *%rcx # TAILCALL @@ -130,6 +169,10 @@ ; LINUX: jne ; LINUX: jmpq *{{.*}} # TAILCALL ; LINUX: jmpq *{{.*}} # TAILCALL +; LINUX-X32-LABEL: h_thunk: +; LINUX-X32: jne +; LINUX-X32: jmpq *{{.*}} # TAILCALL +; LINUX-X32: jmpq *{{.*}} # TAILCALL ; WINDOWS-LABEL: h_thunk: ; WINDOWS: jne ; WINDOWS: jmpq *{{.*}} # TAILCALL Index: test/CodeGen/X86/soft-fp.ll =================================================================== --- test/CodeGen/X86/soft-fp.ll +++ test/CodeGen/X86/soft-fp.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2,+soft-float | FileCheck %s ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+sse2,+soft-float | FileCheck %s ; CHECK-NOT: xmm{[0-9]+} Index: test/CodeGen/X86/stdarg.ll =================================================================== --- test/CodeGen/X86/stdarg.ll +++ test/CodeGen/X86/stdarg.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -; CHECK: testb %al, %al +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK %struct.__va_list_tag = type { i32, i32, i8*, i8* } @@ -8,6 +7,15 @@ %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2] %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; [#uses=2] call void @llvm.va_start(i8* %ap12) +; CHECK: testb %al, %al + +; These test for specific offsets, which is very fragile. Still, the test needs +; to ensure that va_list is correctly handled by both x86-64 and x32. +; +; CHECK-DAG: movq {{.*}}, 192(%rsp) +; CHECK-DAG: movq {{.*}}, 184(%rsp) +; CHECK-DAG: movl {{.*}}, 180(%rsp) +; CHECK-DAG: movl {{.*}}, 176(%rsp) %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1] call void @bar(%struct.__va_list_tag* %ap3) nounwind call void @llvm.va_end(i8* %ap12) Index: test/CodeGen/X86/x32-va_start.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/x32-va_start.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=-sse | FileCheck %s -check-prefix=CHECK -check-prefix=NOSSE +; +; Verifies that x32 va_start lowering is sane. To regenerate this test, use +; cat < +; +; int foo(float a, const char* fmt, ...) { +; va_list ap; +; va_start(ap, fmt); +; int value = va_arg(ap, int); +; va_end(ap); +; return value; +; } +; EOF +; build/bin/clang -mx32 -O3 -o- -S -emit-llvm -xc - +; +target datalayout = "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnux32" + +%struct.__va_list_tag = type { i32, i32, i8*, i8* } + +define i32 @foo(float %a, i8* nocapture readnone %fmt, ...) nounwind { +entry: + %ap = alloca [1 x %struct.__va_list_tag], align 16 + %0 = bitcast [1 x %struct.__va_list_tag]* %ap to i8* + call void @llvm.lifetime.start(i64 16, i8* %0) #2 + call void @llvm.va_start(i8* %0) +; SSE: subl $72, %esp +; SSE: testb %al, %al +; SSE: je .[[NOFP:.*]] +; SSE-DAG: movaps %xmm1 +; SSE-DAG: movaps %xmm2 +; SSE-DAG: movaps %xmm3 +; SSE-DAG: movaps %xmm4 +; SSE-DAG: movaps %xmm5 +; SSE-DAG: movaps %xmm6 +; SSE-DAG: movaps %xmm7 +; NOSSE-NOT: xmm +; SSE: .[[NOFP]]: +; CHECK-DAG: movq %r9 +; CHECK-DAG: movq %r8 +; CHECK-DAG: movq %rcx +; CHECK-DAG: movq %rdx +; CHECK-DAG: movq %rsi + %gp_offset_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 0 + %gp_offset = load i32, i32* %gp_offset_p, align 16 + %fits_in_gp = icmp ult i32 %gp_offset, 41 + br i1 %fits_in_gp, label %vaarg.in_reg, label %vaarg.in_mem +; CHECK: cmpl $40, [[COUNT:.*]] +; CHECK: ja .[[IN_MEM:.*]] + +vaarg.in_reg: ; preds = %entry + %1 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 3 + %reg_save_area = load i8*, i8** %1, align 4 + %2 = getelementptr i8, i8* %reg_save_area, i32 %gp_offset + %3 = add i32 %gp_offset, 8 + store i32 %3, i32* %gp_offset_p, align 16 + br label %vaarg.end +; CHECK: movl {{[^,]*}}, [[ADDR:.*]] +; CHECK: addl [[COUNT]], [[ADDR]] +; SSE: jmp .[[END:.*]] +; NOSSE: movl ([[ADDR]]), %eax +; NOSSE: retq +; CHECK: .[[IN_MEM]]: +vaarg.in_mem: ; preds = %entry + %overflow_arg_area_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2 + %overflow_arg_area = load i8*, i8** %overflow_arg_area_p, align 8 + %overflow_arg_area.next = getelementptr i8, i8* %overflow_arg_area, i32 8 + store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8 + br label %vaarg.end +; CHECK: movl {{[^,]*}}, [[ADDR]] +; NOSSE: movl ([[ADDR]]), %eax +; NOSSE: retq +; SSE: .[[END]]: + +vaarg.end: ; preds = %vaarg.in_mem, %vaarg.in_reg + %vaarg.addr.in = phi i8* [ %2, %vaarg.in_reg ], [ %overflow_arg_area, %vaarg.in_mem ] + %vaarg.addr = bitcast i8* %vaarg.addr.in to i32* + %4 = load i32, i32* %vaarg.addr, align 4 + call void @llvm.va_end(i8* %0) + call void @llvm.lifetime.end(i64 16, i8* %0) #2 + ret i32 %4 +; SSE: movl ([[ADDR]]), %eax +; SSE: retq +} + +; Function Attrs: nounwind argmemonly +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) nounwind + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) nounwind + +; Function Attrs: nounwind argmemonly +declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind +