Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1294,6 +1294,8 @@ HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">; def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">, Group, Flags<[NoArgumentUnused]>; +def fopenmp_target_arch_EQ : Joined<["-"], "fopenmp-target-arch=">, Flags<[DriverOption]>, + HelpText<"Pass a single target architecture (default for NVIDIA is sm_20) to be used by OpenMP device offloading.">; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; def force__cpusubtype__ALL : Flag<["-"], "force_cpusubtype_ALL">; Index: lib/Driver/ToolChains/Cuda.cpp =================================================================== --- lib/Driver/ToolChains/Cuda.cpp +++ lib/Driver/ToolChains/Cuda.cpp @@ -212,8 +212,20 @@ static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); + StringRef GPUArchName; + std::vector GPUArchNames; + // If this is an OpenMP action we need to extract the device architecture from + // the -fopenmp-target-arch option. + if (JA.isDeviceOffloading(Action::OFK_OpenMP)) { + GPUArchNames = Args.getAllArgValues(options::OPT_fopenmp_target_arch_EQ); + assert(GPUArchNames.size() == 1 && + "Exactly one GPU Arch required for ptxas."); + GPUArchName = GPUArchNames[0]; + } else + GPUArchName = JA.getOffloadingArch(); + // Obtain architecture from the action. - CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch()); + CudaArch gpu_arch = StringToCudaArch(GPUArchName); assert(gpu_arch != CudaArch::UNKNOWN && "Device action expected to have an architecture."); @@ -342,7 +354,9 @@ Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + StringRef GpuArch = DriverArgs.getLastArgValue( + DeviceOffloadingKind == Action::OFK_OpenMP ? + options::OPT_fopenmp_target_arch_EQ : options::OPT_march_EQ); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); assert((DeviceOffloadingKind == Action::OFK_OpenMP || DeviceOffloadingKind == Action::OFK_Cuda) && @@ -405,7 +419,7 @@ // For OpenMP device offloading, append derived arguments. Make sure // flags are not duplicated. - // TODO: Append the compute capability. + // Also append the compute capability. if (DeviceOffloadKind == Action::OFK_OpenMP) { for (Arg *A : Args){ bool IsDuplicate = false; @@ -418,6 +432,14 @@ if (!IsDuplicate) DAL->append(A); } + + // Get the compute capability from the -fopenmp-target-arch flag. + // The default compute capability is sm_20 since this is a CUDA + // tool chain. + if (Args.getAllArgValues(options::OPT_fopenmp_target_arch_EQ).empty()) + DAL->AddJoinedArg(nullptr, + Opts.getOption(options::OPT_fopenmp_target_arch_EQ), "sm_20"); + return DAL; } Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -599,3 +599,11 @@ // CHK-FOPENMP-IS-DEVICE: clang{{.*}}.i" {{.*}}" "-fopenmp-is-device" // CHK-FOPENMP-IS-DEVICE-NEXT: clang{{.*}}.bc" {{.*}}.i" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" // CHK-FOPENMP-IS-DEVICE-NEXT: clang{{.*}}.s" {{.*}}.bc" "-fopenmp-is-device" + +/// ########################################################################### + +/// Check -march propagates compute capability to device offloading toolchain. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -save-temps -no-canonical-prefixes -fopenmp-target-arch=sm_35 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMPUTE-CAPABILITY %s + +// CHK-COMPUTE-CAPABILITY: clang: warning: argument unused during compilation: '-fopenmp-target-arch=sm_35'