diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -452,6 +452,9 @@ ENUM_CODEGENOPT(SwiftAsyncFramePointer, SwiftAsyncFramePointerKind, 2, SwiftAsyncFramePointerKind::Always) +/// Whether to skip RAX setup when passing variable arguments (x86 only). +CODEGENOPT(SkipRaxSetup, 1, 0) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3189,6 +3189,10 @@ HelpText<"Disable function outlining (AArch64 only)">; def mno_ms_bitfields : Flag<["-"], "mno-ms-bitfields">, Group, HelpText<"Do not set the default structure layout to be compatible with the Microsoft compiler standard">; +def mskip_rax_setup : Flag<["-"], "mskip-rax-setup">, Group, Flags<[CC1Option]>, + HelpText<"Skip setting up RAX register when passing variable arguments (x86 only)">, + MarshallingInfoFlag>; +def mno_skip_rax_setup : Flag<["-"], "mno-skip-rax-setup">, Group, Flags<[CC1Option]>; def mstackrealign : Flag<["-"], "mstackrealign">, Group, Flags<[CC1Option]>, HelpText<"Force realign the stack at entry to every function">, MarshallingInfoFlag>; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -813,6 +813,8 @@ getCodeGenOpts().StackProtectorGuardOffset); if (getCodeGenOpts().StackAlignment) getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment); + if (getCodeGenOpts().SkipRaxSetup) + getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2194,6 +2194,11 @@ CmdArgs.push_back("-x86-asm-syntax=intel"); } + if (Arg *A = Args.getLastArg(options::OPT_mskip_rax_setup, + options::OPT_mno_skip_rax_setup)) + if (A->getOption().matches(options::OPT_mskip_rax_setup)) + CmdArgs.push_back(Args.MakeArgString("-mskip-rax-setup")); + // Set flags to support MCU ABI. if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) { CmdArgs.push_back("-mfloat-abi"); diff --git a/clang/test/CodeGen/pr23258.c b/clang/test/CodeGen/pr23258.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/pr23258.c @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=NO-SKIP +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -mskip-rax-setup -emit-llvm %s -o - | FileCheck %s -check-prefix=SKIP + +void f() {} + +// SKIP: !"SkipRaxSetup", i32 1} +// NO-SKIP-NOT: "SkipRaxSetup" diff --git a/clang/test/Driver/x86_features.c b/clang/test/Driver/x86_features.c --- a/clang/test/Driver/x86_features.c +++ b/clang/test/Driver/x86_features.c @@ -5,3 +5,9 @@ // Test that we don't produce an error with -mieee-fp. // RUN: %clang -### %s -mieee-fp -S 2>&1 | FileCheck --check-prefix=IEEE %s // IEEE-NOT: error: unknown argument + +// RUN: %clang -### %s -mskip-rax-setup -S 2>&1 | FileCheck --check-prefix=SRS %s +// SRS: "-mskip-rax-setup" + +// RUN: %clang -### %s -mno-skip-rax-setup -S 2>&1 | FileCheck --check-prefix=NO-SRS %s +// NO-SRS-NOT: "-mskip-rax-setup" diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4414,7 +4414,8 @@ } } - if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) { + if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail && + (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in diff --git a/llvm/test/CodeGen/X86/pr23258.ll b/llvm/test/CodeGen/X86/pr23258.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr23258.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=HAS-RAX +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse | FileCheck %s --check-prefix=HAS-RAX +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-sse | FileCheck %s --check-prefix=NO-RAX + +define void @foo() { +; HAS-RAX-LABEL: foo: +; HAS-RAX: # %bb.0: +; HAS-RAX-NEXT: movl $1, %edi +; HAS-RAX-NEXT: xorl %eax, %eax +; HAS-RAX-NEXT: jmp bar@PLT # TAILCALL +; +; NO-RAX-LABEL: foo: +; NO-RAX: # %bb.0: +; NO-RAX-NEXT: movl $1, %edi +; NO-RAX-NEXT: jmp bar@PLT # TAILCALL + tail call void (i32, ...) @bar(i32 1) + ret void +} + +define void @bar(i32, ...) nounwind { +; HAS-RAX-LABEL: bar: +; HAS-RAX: # %bb.0: +; HAS-RAX-NEXT: subq $56, %rsp +; HAS-RAX-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movq %r9, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: testb %al, %al +; HAS-RAX-NEXT: je .LBB1_2 +; HAS-RAX-NEXT: # %bb.1: +; HAS-RAX-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movaps %xmm5, (%rsp) +; HAS-RAX-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp) +; HAS-RAX-NEXT: .LBB1_2: +; HAS-RAX-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; HAS-RAX-NEXT: movq %rax, 8 +; HAS-RAX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax +; HAS-RAX-NEXT: movq %rax, 16 +; HAS-RAX-NEXT: movl $8, 0 +; HAS-RAX-NEXT: movl $48, 4 +; HAS-RAX-NEXT: addq $56, %rsp +; HAS-RAX-NEXT: retq +; +; NO-RAX-LABEL: bar: +; NO-RAX: # %bb.0: +; NO-RAX-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; NO-RAX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; NO-RAX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; NO-RAX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; NO-RAX-NEXT: movq %r9, -{{[0-9]+}}(%rsp) +; NO-RAX-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; NO-RAX-NEXT: movq %rax, 8 +; NO-RAX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax +; NO-RAX-NEXT: movq %rax, 16 +; NO-RAX-NEXT: movl $8, 0 +; NO-RAX-NEXT: movl $48, 4 +; NO-RAX-NEXT: retq + call void @llvm.va_start(i8* null) + ret void +} + +declare void @llvm.va_start(i8*) + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"SkipRaxSetup", i32 1}