Index: include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- include/clang/Basic/DiagnosticDriverKinds.td +++ include/clang/Basic/DiagnosticDriverKinds.td @@ -69,6 +69,10 @@ "invalid Xarch argument: '%0', options requiring arguments are unsupported">; def err_drv_invalid_Xarch_argument_isdriver : Error< "invalid Xarch argument: '%0', cannot change driver behavior inside Xarch argument">; +def err_drv_Xopenmp_target_missing_triple : Error< + "cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target=">; +def err_drv_invalid_Xopenmp_target_with_args : Error< + "invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">; def err_drv_argument_only_allowed_with : Error< "invalid argument '%0' only allowed with '%1'">; def err_drv_argument_not_allowed_with : Error< Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -459,6 +459,10 @@ HelpText<"Pass to fatbinary invocation">, MetaVarName<"">; def Xcuda_ptxas : Separate<["-"], "Xcuda-ptxas">, HelpText<"Pass to the ptxas assembler">, MetaVarName<"">; +def Xopenmp_target : Separate<["-"], "Xopenmp-target">, + HelpText<"Pass to the target offloading toolchain.">, MetaVarName<"">; +def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">, + HelpText<"Pass to the specified target offloading toolchain. The triple that identifies the toolchain must be provided after the equals sign.">, MetaVarName<"">; def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>, HelpText<"Pass -z to the linker">, MetaVarName<"">, Group; Index: include/clang/Driver/ToolChain.h =================================================================== --- include/clang/Driver/ToolChain.h +++ include/clang/Driver/ToolChain.h @@ -217,6 +217,17 @@ return nullptr; } + /// TranslateOpenMPTargetArgs - Create a new derived argument list for + /// that contains the OpenMP target specific flags passed via + /// -Xopenmp-target -opt=val OR -Xopenmp-target= -opt=val + /// Translation occurs only when the \p DeviceOffloadKind is specified. + /// + /// \param DeviceOffloadKind - The device offload kind used for the + /// translation. + virtual llvm::opt::DerivedArgList * + TranslateOpenMPTargetArgs(const llvm::opt::DerivedArgList &Args, + Action::OffloadKind DeviceOffloadKind) const; + /// Choose a tool to use to handle the action \p JA. /// /// This can be overridden when a particular ToolChain needs to use Index: lib/Driver/Compilation.cpp =================================================================== --- lib/Driver/Compilation.cpp +++ lib/Driver/Compilation.cpp @@ -59,7 +59,12 @@ DerivedArgList *&Entry = TCArgs[{TC, BoundArch, DeviceOffloadKind}]; if (!Entry) { - Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind); + // Translate OpenMP toolchain arguments provided via the -Xopenmp-target flags. + Entry = TC->TranslateOpenMPTargetArgs(*TranslatedArgs, DeviceOffloadKind); + if (!Entry) + Entry = TranslatedArgs; + + Entry = TC->TranslateArgs(*Entry, BoundArch, DeviceOffloadKind); if (!Entry) Entry = TranslatedArgs; } Index: lib/Driver/ToolChain.cpp =================================================================== --- lib/Driver/ToolChain.cpp +++ lib/Driver/ToolChain.cpp @@ -775,3 +775,69 @@ return VersionTuple(); } + +llvm::opt::DerivedArgList * +ToolChain::TranslateOpenMPTargetArgs(const llvm::opt::DerivedArgList &Args, + Action::OffloadKind DeviceOffloadKind) const { + if (DeviceOffloadKind == Action::OFK_OpenMP) { + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); + const OptTable &Opts = getDriver().getOpts(); + + // Handle -Xopenmp-target flags + for (Arg *A : Args) { + // Exclude flags which may only apply to the host toolchain. + // Do not exclude flags when the host triple (AuxTriple), + // matches the current toolchain triple. + if (A->getOption().matches(options::OPT_m_Group)) { + if (getAuxTriple() && getAuxTriple()->str() == getTriple().str()) + DAL->append(A); + continue; + } + + unsigned Index; + unsigned Prev; + bool XOpenMPTargetNoTriple = A->getOption().matches( + options::OPT_Xopenmp_target); + + if (A->getOption().matches(options::OPT_Xopenmp_target_EQ)) { + // Passing device args: -Xopenmp-target= -opt=val. + if (A->getValue(0) == getTripleString()) + Index = Args.getBaseArgs().MakeIndex(A->getValue(1)); + else + continue; + } else if (XOpenMPTargetNoTriple) { + // Passing device args: -Xopenmp-target -opt=val. + Index = Args.getBaseArgs().MakeIndex(A->getValue(0)); + } else { + DAL->append(A); + continue; + } + + // Parse the argument to -Xopenmp-target. + Prev = Index; + std::unique_ptr XOpenMPTargetArg(Opts.ParseOneArg(Args, Index)); + if (!XOpenMPTargetArg || Index > Prev + 1) { + getDriver().Diag(diag::err_drv_invalid_Xopenmp_target_with_args) + << A->getAsString(Args); + continue; + } + if (XOpenMPTargetNoTriple && XOpenMPTargetArg && + Args.getAllArgValues( + options::OPT_fopenmp_targets_EQ).size() != 1) { + getDriver().Diag(diag::err_drv_Xopenmp_target_missing_triple); + continue; + } + XOpenMPTargetArg->setBaseArg(A); + A = XOpenMPTargetArg.release(); + + // Ignore all but last -march=arch flag. + if (A->getOption().matches(options::OPT_march_EQ)) + DAL->eraseArg(options::OPT_march_EQ); + DAL->append(A); + } + + return DAL; + } + + return nullptr; +} Index: lib/Driver/ToolChains/Cuda.cpp =================================================================== --- lib/Driver/ToolChains/Cuda.cpp +++ lib/Driver/ToolChains/Cuda.cpp @@ -212,8 +212,20 @@ static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); + StringRef GPUArchName; + std::vector GPUArchNames; + // If this is an OpenMP action we need to extract the device architecture from + // the -fopenmp-target-arch option. + if (JA.isDeviceOffloading(Action::OFK_OpenMP)) { + GPUArchNames = Args.getAllArgValues(options::OPT_march_EQ); + assert(GPUArchNames.size() == 1 && + "Exactly one GPU Arch required for ptxas."); + GPUArchName = GPUArchNames[0]; + } else + GPUArchName = JA.getOffloadingArch(); + // Obtain architecture from the action. - CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch()); + CudaArch gpu_arch = StringToCudaArch(GPUArchName); assert(gpu_arch != CudaArch::UNKNOWN && "Device action expected to have an architecture."); @@ -405,7 +417,7 @@ // For OpenMP device offloading, append derived arguments. Make sure // flags are not duplicated. - // TODO: Append the compute capability. + // Also append the compute capability. if (DeviceOffloadKind == Action::OFK_OpenMP) { for (Arg *A : Args){ bool IsDuplicate = false; @@ -418,6 +430,15 @@ if (!IsDuplicate) DAL->append(A); } + + auto MArchList = DAL->getAllArgValues(options::OPT_march_EQ); + assert(MArchList.size() < 2 && + "Too many archs under -Xopenmp-targets"); + if (MArchList.empty()) + // Default compute capability for CUDA toolchain is sm_20. + DAL->AddJoinedArg(nullptr, + Opts.getOption(options::OPT_march_EQ), "sm_20"); + return DAL; } Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -597,3 +597,35 @@ // RUN: | FileCheck -check-prefix=CHK-FOPENMP-IS-DEVICE %s // CHK-FOPENMP-IS-DEVICE: clang{{.*}} "-aux-triple" "powerpc64le--linux" {{.*}}.c" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" + +/// ########################################################################### + +/// Check -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 is passed when compiling for the device. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target=powerpc64le-ibm-linux-gnu -mcpu=pwr7 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-EQ-TARGET %s + +// CHK-FOPENMP-EQ-TARGET: clang{{.*}} "-target-cpu" "pwr7" + +/// ########################################################################### + +/// Check -Xopenmp-target -march=pwr8 is passed when compiling for the device. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target -mcpu=pwr7 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET %s + +// CHK-FOPENMP-TARGET: clang{{.*}} "-target-cpu" "pwr7" + +/// ########################################################################### + +/// Check -Xopenmp-target triggers error when multiple triples are used. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-unknown-linux-gnu -Xopenmp-target -mcpu=pwr8 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-AMBIGUOUS-ERROR %s + +// CHK-FOPENMP-TARGET-AMBIGUOUS-ERROR: clang{{.*}} error: cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target= + +/// ########################################################################### + +/// Check -Xopenmp-target triggers error when an option requiring arguments is passed to it. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target -Xopenmp-target -mcpu=pwr8 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-NESTED-ERROR %s + +// CHK-FOPENMP-TARGET-NESTED-ERROR: clang{{.*}} error: invalid -Xopenmp-target argument: '-Xopenmp-target -Xopenmp-target', options requiring arguments are unsupported