diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -409,9 +409,6 @@ CmdArgs.push_back("--return-at-end"); } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { // Map the -O we received to -O{0,1,2,3}. - // - // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's - // default, so it may correspond more closely to the spirit of clang -O2. // -O3 seems like the least-bad option when -Osomething is specified to // clang but it isn't handled below. @@ -433,9 +430,9 @@ } CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); } else { - // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond - // to no optimizations, but ptxas's default is -O3. - CmdArgs.push_back("-O0"); + // If no -O was passed, pass -O3 to ptxas -- this makes ptxas's + // optimization level the same as the ptxjitcompiler. + CmdArgs.push_back("-O3"); } if (DIKind == DebugDirectivesOnly) CmdArgs.push_back("-lineinfo"); diff --git a/clang/test/Driver/cuda-external-tools.cu b/clang/test/Driver/cuda-external-tools.cu --- a/clang/test/Driver/cuda-external-tools.cu +++ b/clang/test/Driver/cuda-external-tools.cu @@ -40,10 +40,10 @@ // RUN: --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s -// Regular compile without -O. This should result in us passing -O0 to ptxas. +// Regular compile without -O. This should result in us passing -O3 to ptxas. // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ // RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s // Regular compiles with -Os and -Oz. For lack of a better option, we map // these to ptxas -O3. @@ -75,7 +75,7 @@ // Compile with -fintegrated-as. This should still cause us to invoke ptxas. // RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \ // RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ -// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s +// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s // Check that we still pass -c when generating relocatable device code. // RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \ // RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \