Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -449,6 +449,10 @@ HelpText<"Pass to fatbinary invocation">, MetaVarName<"">; def Xcuda_ptxas : Separate<["-"], "Xcuda-ptxas">, HelpText<"Pass to the ptxas assembler">, MetaVarName<"">; +def Xopenmp_target : Separate<["-"], "Xopenmp-target">, + HelpText<"Pass arguments to target offloading toolchain.">; +def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">, + HelpText<"Pass arguments to target offloading toolchain. First entry is a triple that identifies the toolchain.">; def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>, HelpText<"Pass -z to the linker">, MetaVarName<"">, Group; Index: lib/Driver/ToolChains/Cuda.cpp =================================================================== --- lib/Driver/ToolChains/Cuda.cpp +++ lib/Driver/ToolChains/Cuda.cpp @@ -212,8 +212,20 @@ static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); + StringRef GPUArchName; + std::vector GPUArchNames; + // If this is an OpenMP action we need to extract the device architecture from + // the -fopenmp-target-arch option. + if (JA.isDeviceOffloading(Action::OFK_OpenMP)) { + GPUArchNames = Args.getAllArgValues(options::OPT_march_EQ); + assert(GPUArchNames.size() == 1 && + "Exactly one GPU Arch required for ptxas."); + GPUArchName = GPUArchNames[0]; + } else + GPUArchName = JA.getOffloadingArch(); + // Obtain architecture from the action. - CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch()); + CudaArch gpu_arch = StringToCudaArch(GPUArchName); assert(gpu_arch != CudaArch::UNKNOWN && "Device action expected to have an architecture."); @@ -392,6 +404,15 @@ CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); } +void AddMArchOption(DerivedArgList *DAL, + const OptTable &Opts, + StringRef Opt) { + if (Opt.startswith("-march=")) + DAL->AddJoinedArg(nullptr, + Opts.getOption(options::OPT_march_EQ), + Opt.split("=").second); +} + llvm::opt::DerivedArgList * CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, @@ -405,7 +426,7 @@ // For OpenMP device offloading, append derived arguments. Make sure // flags are not duplicated. - // TODO: Append the compute capability. + // Also append the compute capability. if (DeviceOffloadKind == Action::OFK_OpenMP) { for (Arg *A : Args){ bool IsDuplicate = false; @@ -418,6 +439,47 @@ if (!IsDuplicate) DAL->append(A); } + + // Get the compute capability from the -fopenmp-targets flag. + // The default compute capability is sm_20 since this is a CUDA + // tool chain. + auto OptList = Args.getAllArgValues(options::OPT_Xopenmp_target_EQ); + + // For each OPT_Xopenmp_target_EQ option, the function returns + // two strings, the triple and the option. + // The following format is assumed: + // + // -Xopenmp-target=nvptx64-nvidia-cuda -opt=val + for (unsigned i = 0; i < OptList.size(); i+=2) { + StringRef Opt = OptList[i+1]; + if (OptList[i] == getTripleString()) + AddMArchOption(DAL, Opts, Opt); + } + + OptList = Args.getAllArgValues(options::OPT_Xopenmp_target); + // When there is only one option in the list, the following format + // is assumed: + // + // -Xopenmp-target -opt=val + + // By default, if no triple is explicitely specified, we + // associate -opt=val with the toolchain specified under the + // -fopenmp-targets flag (provided that there is only one such + // toolchain specified). + assert(Args.getAllArgValues(options::OPT_fopenmp_targets_EQ).size() == 1 && + "Target toolchain not specified on -Xopenmp-target and cannot be deduced."); + + // Add arch + for (StringRef Opt : OptList) { + AddMArchOption(DAL, Opts, Opt); + } + + auto MArchList = DAL->getAllArgValues(options::OPT_march_EQ); + assert(MArchList.size() < 2 && "At most one GPU arch allowed."); + if (MArchList.empty()) + DAL->AddJoinedArg(nullptr, + Opts.getOption(options::OPT_march_EQ), "sm_20"); + return DAL; } Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -597,3 +597,19 @@ // RUN: | FileCheck -check-prefix=CHK-FOPENMP-IS-DEVICE %s // CHK-FOPENMP-IS-DEVICE: clang{{.*}} "-aux-triple" "powerpc64le-unknown-linux-gnu" {{.*}}.c" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" + +/// ########################################################################### + +/// Check -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 is passed when compiling for the device. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-EQ-TARGET %s + +// CHK-FOPENMP-EQ-TARGET: clang{{.*}} argument unused during compilation: '-Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8' + +/// ########################################################################### + +/// Check -Xopenmp-target -march=pwr8 is passed when compiling for the device. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target -march=pwr8 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET %s + +// CHK-FOPENMP-TARGET: clang{{.*}} argument unused during compilation: '-Xopenmp-target -march=pwr8'