Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -10695,15 +10695,25 @@ assert(gpu_archs.size() == 1 && "Exactly one GPU Arch required for ptxas."); const std::string& gpu_arch = gpu_archs[0]; - ArgStringList CmdArgs; CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); + bool WantDebug = false; + Args.ClaimAllArgs(options::OPT_g_Group); + if (Arg *A = Args.getLastArg(options::OPT_g_Group)) + WantDebug = !A->getOption().matches(options::OPT_g0) && + !A->getOption().matches(options::OPT_ggdb0); + if (WantDebug) { + // ptxas does not accept -g option if optimization is enabled, so we ignore + // compiler's -O* options if we want debug info. + CmdArgs.push_back("-g"); + CmdArgs.push_back("--dont-merge-basicblocks"); + CmdArgs.push_back("--return-at-end"); + } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + // Map the -O we received to -O{0,1,2,3}. + // + // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's + // default, so it may correspond more closely to the spirit of clang -O2. - // Map the -O we received to -O{0,1,2,3}. - // - // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default, - // so it may correspond more closely to the spirit of clang -O2. - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { // -O3 seems like the least-bad option when -Osomething is specified to // clang but it isn't handled below. StringRef OOpt = "3"; @@ -10729,9 +10739,6 @@ CmdArgs.push_back("-O0"); } - // Don't bother passing -g to ptxas: It's enabled by default at -O0, and - // not supported at other optimization levels. - CmdArgs.push_back("--gpu-name"); CmdArgs.push_back(Args.MakeArgString(gpu_arch)); CmdArgs.push_back("--output-file"); Index: test/Driver/cuda-external-tools.cu =================================================================== --- test/Driver/cuda-external-tools.cu +++ test/Driver/cuda-external-tools.cu @@ -18,6 +18,14 @@ // RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \ // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s +// With debugging enabled, ptxas should be run with with no ptxas optimizations. +// RUN: %clang -### -target x86_64-linux-gnu -g -O2 -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s + +// Except when -g0 is passed which whould re-enable ptxas optimizations +// RUN: %clang -### -target x86_64-linux-gnu -g0 -O2 -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s + // Regular compile without -O. This should result in us passing -O0 to ptxas. // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s @@ -59,9 +67,14 @@ // ARCH64: "-m64" // ARCH32: "-m32" // OPT0: "-O0" +// OPT0-NOT: "-g" // OPT1: "-O1" +// OPT1-NOT: "-g" // OPT2: "-O2" +// OPT2-NOT: "-g" // OPT3: "-O3" +// OPT3-NOT: "-g" +// DBG: "-g" "--dont-merge-basicblocks" "--return-at-end" // SM20: "--gpu-name" "sm_20" // SM35: "--gpu-name" "sm_35" // SM20: "--output-file" "[[CUBINFILE:[^"]*]]"