Index: cfe/trunk/include/clang/Driver/Options.td =================================================================== --- cfe/trunk/include/clang/Driver/Options.td +++ cfe/trunk/include/clang/Driver/Options.td @@ -378,6 +378,8 @@ Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">; def cuda_host_only : Flag<["--"], "cuda-host-only">, HelpText<"Do host-side CUDA compilation only">; +def cuda_noopt_device_debug : Flag<["--"], "cuda-noopt-device-debug">, + HelpText<"Enable device-side debug info generation. Disables ptxas optimizations.">; def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group, HelpText<"CUDA installation path">; def dA : Flag<["-"], "dA">, Group; Index: cfe/trunk/lib/Driver/Tools.cpp =================================================================== --- cfe/trunk/lib/Driver/Tools.cpp +++ cfe/trunk/lib/Driver/Tools.cpp @@ -10691,15 +10691,20 @@ assert(gpu_archs.size() == 1 && "Exactly one GPU Arch required for ptxas."); const std::string& gpu_arch = gpu_archs[0]; - ArgStringList CmdArgs; CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); + if (Args.getLastArg(options::OPT_cuda_noopt_device_debug)) { + // ptxas does not accept -g option if optimization is enabled, so + // we ignore the compiler's -O* options if we want debug info. + CmdArgs.push_back("-g"); + CmdArgs.push_back("--dont-merge-basicblocks"); + CmdArgs.push_back("--return-at-end"); + } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + // Map the -O we received to -O{0,1,2,3}. + // + // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's + // default, so it may correspond more closely to the spirit of clang -O2. - // Map the -O we received to -O{0,1,2,3}. - // - // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default, - // so it may correspond more closely to the spirit of clang -O2. - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { // -O3 seems like the least-bad option when -Osomething is specified to // clang but it isn't handled below. StringRef OOpt = "3"; @@ -10725,9 +10730,6 @@ CmdArgs.push_back("-O0"); } - // Don't bother passing -g to ptxas: It's enabled by default at -O0, and - // not supported at other optimization levels. - CmdArgs.push_back("--gpu-name"); CmdArgs.push_back(Args.MakeArgString(gpu_arch)); CmdArgs.push_back("--output-file"); Index: cfe/trunk/test/Driver/cuda-external-tools.cu =================================================================== --- cfe/trunk/test/Driver/cuda-external-tools.cu +++ cfe/trunk/test/Driver/cuda-external-tools.cu @@ -18,6 +18,10 @@ // RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \ // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s +// With debugging enabled, ptxas should be run with with no ptxas optimizations. +// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s + // Regular compile without -O. This should result in us passing -O0 to ptxas. // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s @@ -59,9 +63,14 @@ // ARCH64: "-m64" // ARCH32: "-m32" // OPT0: "-O0" +// OPT0-NOT: "-g" // OPT1: "-O1" +// OPT1-NOT: "-g" // OPT2: "-O2" +// OPT2-NOT: "-g" // OPT3: "-O3" +// OPT3-NOT: "-g" +// DBG: "-g" "--dont-merge-basicblocks" "--return-at-end" // SM20: "--gpu-name" "sm_20" // SM35: "--gpu-name" "sm_35" // SM20: "--output-file" "[[CUBINFILE:[^"]*]]"