Index: cfe/trunk/include/clang/Driver/Driver.h =================================================================== --- cfe/trunk/include/clang/Driver/Driver.h +++ cfe/trunk/include/clang/Driver/Driver.h @@ -456,8 +456,10 @@ /// ConstructAction - Construct the appropriate action to do for /// \p Phase on the \p Input, taking in to account arguments /// like -fsyntax-only or --analyze. - Action *ConstructPhaseAction(Compilation &C, const llvm::opt::ArgList &Args, - phases::ID Phase, Action *Input) const; + Action *ConstructPhaseAction( + Compilation &C, const llvm::opt::ArgList &Args, phases::ID Phase, + Action *Input, + Action::OffloadKind TargetDeviceOffloadKind = Action::OFK_None) const; /// BuildJobsForAction - Construct the jobs to perform for the action \p A and /// return an InputInfo for the result of running \p A. Will only construct Index: cfe/trunk/lib/Driver/Driver.cpp =================================================================== --- cfe/trunk/lib/Driver/Driver.cpp +++ cfe/trunk/lib/Driver/Driver.cpp @@ -2171,7 +2171,7 @@ break; CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction( - C, Args, Ph, CudaDeviceActions[I]); + C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda); if (Ph == phases::Assemble) break; @@ -3011,8 +3011,9 @@ Args.ClaimAllArgs(options::OPT_cuda_compile_host_device); } -Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args, - phases::ID Phase, Action *Input) const { +Action *Driver::ConstructPhaseAction( + Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input, + Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Constructing phase actions"); // Some types skip the assembler phase (e.g., llvm-bc), but we can't @@ -3074,7 +3075,7 @@ return C.MakeAction(Input, types::TY_LLVM_BC); } case phases::Backend: { - if (isUsingLTO()) { + if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction(Input, Output); Index: cfe/trunk/lib/Driver/ToolChains/Clang.cpp =================================================================== --- cfe/trunk/lib/Driver/ToolChains/Clang.cpp +++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp @@ -3249,7 +3249,11 @@ if (JA.getType() == types::TY_LLVM_BC) CmdArgs.push_back("-emit-llvm-uselists"); - if (D.isUsingLTO()) { + // Device-side jobs do not support LTO. + bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) || + JA.isDeviceOffloading(Action::OFK_Host)); + + if (D.isUsingLTO() && !isDeviceOffloadAction) { Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); // The Darwin and PS4 linkers currently use the legacy LTO API, which Index: cfe/trunk/test/Driver/lto.cu =================================================================== --- cfe/trunk/test/Driver/lto.cu +++ cfe/trunk/test/Driver/lto.cu @@ -0,0 +1,76 @@ +// -flto causes a switch to llvm-bc object files. +// RUN: %clangxx -nocudainc -nocudalib -ccc-print-phases -c %s -flto 2> %t +// RUN: FileCheck -check-prefix=CHECK-COMPILE-ACTIONS < %t %s +// +// CHECK-COMPILE-ACTIONS: 2: compiler, {1}, ir, (host-cuda) +// CHECK-COMPILE-ACTIONS-NOT: lto-bc +// CHECK-COMPILE-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda) + +// RUN: %clangxx -nocudainc -nocudalib -ccc-print-phases %s -flto 2> %t +// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-ACTIONS < %t %s +// +// CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}lto.cu", cuda, (host-cuda) +// CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cuda-cpp-output +// CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir, (host-cuda) +// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}lto.cu", cuda, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object +// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler +// CHECK-COMPILELINK-ACTIONS: 10: linker, {8, 9}, cuda-fatbin, (device-cuda) +// CHECK-COMPILELINK-ACTIONS: 11: offload, "host-cuda {{.*}}" {2}, "device-cuda{{.*}}" {10}, ir +// CHECK-COMPILELINK-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda) +// CHECK-COMPILELINK-ACTIONS: 13: linker, {12}, image, (host-cuda) + +// llvm-bc and llvm-ll outputs need to match regular suffixes +// (unfortunately). +// RUN: %clangxx %s -nocudainc -nocudalib -flto -save-temps -### 2> %t +// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s +// +// CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu" +// CHECK-COMPILELINK-SUFFIXES: "-o" "[[BC:.*lto-host.*\.bc]]" {{.*}}[[CPP]]" +// CHECK-COMPILELINK-SUFFIXES: "-o" "[[OBJ:.*lto-host.*\.o]]" {{.*}}[[BC]]" +// CHECK-COMPILELINK-SUFFIXES: "{{.*}}a.{{(out|exe)}}" {{.*}}[[OBJ]]" + +// RUN: %clangxx %s -nocudainc -nocudalib -flto -S -### 2> %t +// RUN: FileCheck -check-prefix=CHECK-COMPILE-SUFFIXES < %t %s +// +// CHECK-COMPILE-SUFFIXES: "-o" "{{.*}}lto.s" "-x" "cuda" "{{.*}}lto.cu" + +// RUN: not %clangxx -nocudainc -nocudalib %s -emit-llvm 2>&1 \ +// RUN: | FileCheck --check-prefix=LLVM-LINK %s +// LLVM-LINK: -emit-llvm cannot be used when linking + +// -flto should cause link using gold plugin +// RUN: %clangxx -nocudainc -nocudalib \ +// RUN: -target x86_64-unknown-linux -### %s -flto 2> %t +// RUN: FileCheck -check-prefix=CHECK-LINK-LTO-ACTION < %t %s +// +// CHECK-LINK-LTO-ACTION: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}" + +// -flto=full should cause link using gold plugin +// RUN: %clangxx -nocudainc -nocudalib \ +// RUN: -target x86_64-unknown-linux -### %s -flto=full 2> %t +// RUN: FileCheck -check-prefix=CHECK-LINK-FULL-ACTION < %t %s +// +// CHECK-LINK-FULL-ACTION: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}" + +// Check that subsequent -fno-lto takes precedence +// RUN: %clangxx -nocudainc -nocudalib \ +// RUN: -target x86_64-unknown-linux -### %s -flto=full -fno-lto 2> %t +// RUN: FileCheck -check-prefix=CHECK-LINK-NOLTO-ACTION < %t %s +// +// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}" + +// -flto passes along an explicit debugger tuning argument. +// RUN: %clangxx -nocudainc -nocudalib \ +// RUN: -target x86_64-unknown-linux -### %s -flto -glldb 2> %t +// RUN: FileCheck -check-prefix=CHECK-TUNING-LLDB < %t %s +// RUN: %clangxx -nocudainc -nocudalib \ +// RUN: -target x86_64-unknown-linux -### %s -flto -g 2> %t +// RUN: FileCheck -check-prefix=CHECK-NO-TUNING < %t %s +// +// CHECK-TUNING-LLDB: "-plugin-opt=-debugger-tune=lldb" +// CHECK-NO-TUNING-NOT: "-plugin-opt=-debugger-tune Index: cfe/trunk/test/Driver/thinlto.cu =================================================================== --- cfe/trunk/test/Driver/thinlto.cu +++ cfe/trunk/test/Driver/thinlto.cu @@ -0,0 +1,50 @@ +// -flto=thin causes a switch to llvm-bc object files. +// RUN: %clangxx -ccc-print-phases -nocudainc -nocudalib -c %s -flto=thin 2> %t +// RUN: FileCheck -check-prefix=CHECK-COMPILE-ACTIONS < %t %s +// +// CHECK-COMPILE-ACTIONS: 2: compiler, {1}, ir, (host-cuda) +// CHECK-COMPILE-ACTIONS-NOT: lto-bc +// CHECK-COMPILE-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda) + +// RUN: %clangxx -ccc-print-phases -nocudainc -nocudalib %s -flto=thin 2> %t +// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-ACTIONS < %t %s +// +// CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}thinlto.cu", cuda, (host-cuda) +// CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cuda-cpp-output +// CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir, (host-cuda) +// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}thinlto.cu", cuda, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_20) +// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object +// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler +// CHECK-COMPILELINK-ACTIONS: 10: linker, {8, 9}, cuda-fatbin, (device-cuda) +// CHECK-COMPILELINK-ACTIONS: 11: offload, "host-cuda {{.*}}" {2}, "device-cuda{{.*}}" {10}, ir +// CHECK-COMPILELINK-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda) +// CHECK-COMPILELINK-ACTIONS: 13: linker, {12}, image, (host-cuda) + +// -flto=thin should cause link using gold plugin with thinlto option, +// also confirm that it takes precedence over earlier -fno-lto and -flto=full. +// RUN: %clangxx -nocudainc -nocudalib \ +// RUN: -target x86_64-unknown-linux -### %s -flto=full -fno-lto -flto=thin 2> %t +// RUN: FileCheck -check-prefix=CHECK-LINK-THIN-ACTION < %t %s +// +// CHECK-LINK-THIN-ACTION: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}" +// CHECK-LINK-THIN-ACTION: "-plugin-opt=thinlto" + +// Check that subsequent -flto=full takes precedence +// RUN: %clangxx -nocudainc -nocudalib \ +// RUN: -target x86_64-unknown-linux -### %s -flto=thin -flto=full 2> %t +// RUN: FileCheck -check-prefix=CHECK-LINK-FULL-ACTION < %t %s +// +// CHECK-LINK-FULL-ACTION: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}" +// CHECK-LINK-FULL-ACTION-NOT: "-plugin-opt=thinlto" + +// Check that subsequent -fno-lto takes precedence +// RUN: %clangxx -nocudainc -nocudalib \ +// RUN: -target x86_64-unknown-linux -### %s -flto=thin -fno-lto 2> %t +// RUN: FileCheck -check-prefix=CHECK-LINK-NOLTO-ACTION < %t %s +// +// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}" +// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin-opt=thinlto"