Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -466,6 +466,10 @@ HelpText<"Pass to the static analyzer">, MetaVarName<"">, Group; def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[DriverOption]>; +def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[DriverOption]>, + HelpText<"Pass to the CUDA/HIP host compilation">, MetaVarName<"">; +def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[DriverOption]>, + HelpText<"Pass to the CUDA/HIP device compilation">, MetaVarName<"">; def Xassembler : Separate<["-"], "Xassembler">, HelpText<"Pass to the assembler">, MetaVarName<"">, Group; Index: clang/include/clang/Driver/ToolChain.h =================================================================== --- clang/include/clang/Driver/ToolChain.h +++ clang/include/clang/Driver/ToolChain.h @@ -296,10 +296,20 @@ SmallVectorImpl &AllocatedArgs) const; /// Append the argument following \p A to \p DAL assuming \p A is an Xarch - /// argument. - virtual void TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, - llvm::opt::Arg *&A, - llvm::opt::DerivedArgList *DAL) const; + /// argument. If \p AllocatedArgs is null pointer, synthesized arguments are + /// added to \p DAL, otherwise they are appended to \p AllocatedArgs. + virtual void TranslateXarchArgs( + const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A, + llvm::opt::DerivedArgList *DAL, + SmallVectorImpl *AllocatedArgs = nullptr) const; + + /// Translate -Xarch_ arguments. If there are no such arguments, return + /// a null pointer, otherwise return a DerivedArgList containing the + /// translated arguments. + virtual llvm::opt::DerivedArgList * + TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind, + SmallVectorImpl *AllocatedArgs) const; /// Choose a tool to use to handle the action \p JA. /// Index: clang/lib/Driver/Compilation.cpp =================================================================== --- clang/lib/Driver/Compilation.cpp +++ clang/lib/Driver/Compilation.cpp @@ -76,16 +76,29 @@ *TranslatedArgs, SameTripleAsHost, AllocatedArgs); } + DerivedArgList *NewDAL = nullptr; if (!OpenMPArgs) { + NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BoundArch, + DeviceOffloadKind, &AllocatedArgs); + } else { + NewDAL = TC->TranslateXarchArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind, + &AllocatedArgs); + if (!NewDAL) + NewDAL = OpenMPArgs; + else + delete OpenMPArgs; + } + + if (!NewDAL) { Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind); if (!Entry) Entry = TranslatedArgs; } else { - Entry = TC->TranslateArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind); + Entry = TC->TranslateArgs(*NewDAL, BoundArch, DeviceOffloadKind); if (!Entry) - Entry = OpenMPArgs; + Entry = NewDAL; else - delete OpenMPArgs; + delete NewDAL; } // Add allocated arguments to the final DAL. Index: clang/lib/Driver/ToolChain.cpp =================================================================== --- clang/lib/Driver/ToolChain.cpp +++ clang/lib/Driver/ToolChain.cpp @@ -1103,11 +1103,17 @@ return nullptr; } -void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, - llvm::opt::Arg *&A, - llvm::opt::DerivedArgList *DAL) const { +void ToolChain::TranslateXarchArgs( + const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A, + llvm::opt::DerivedArgList *DAL, + SmallVectorImpl *AllocatedArgs) const { const OptTable &Opts = getDriver().getOpts(); - unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1)); + unsigned ValuePos = 1; + if (A->getOption().matches(options::OPT_Xarch_device) || + A->getOption().matches(options::OPT_Xarch_host)) + ValuePos = 0; + + unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos)); unsigned Prev = Index; std::unique_ptr XarchArg(Opts.ParseOneArg(Args, Index)); @@ -1130,5 +1136,49 @@ } XarchArg->setBaseArg(A); A = XarchArg.release(); - DAL->AddSynthesizedArg(A); + if (!AllocatedArgs) + DAL->AddSynthesizedArg(A); + else + AllocatedArgs->push_back(A); +} + +llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs( + const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind OFK, + SmallVectorImpl *AllocatedArgs) const { + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); + bool Modified = false; + + bool IsGPU = OFK == Action::OFK_Cuda || OFK == Action::OFK_HIP; + for (Arg *A : Args) { + bool NeedTrans = false; + bool Skip = false; + if (A->getOption().matches(options::OPT_Xarch_device)) { + NeedTrans = IsGPU; + Skip = !IsGPU; + } else if (A->getOption().matches(options::OPT_Xarch_host)) { + NeedTrans = !IsGPU; + Skip = IsGPU; + } else if (A->getOption().matches(options::OPT_Xarch__) && IsGPU) { + // Do not translate -Xarch_ options for non CUDA/HIP toolchain since + // they may need special translation. + // Skip this argument unless the architecture matches BoundArch + if (BoundArch.empty() || A->getValue(0) != BoundArch) + Skip = true; + else + NeedTrans = true; + } + if (NeedTrans || Skip) + Modified = true; + if (NeedTrans) + TranslateXarchArgs(Args, A, DAL, AllocatedArgs); + if (!Skip) + DAL->append(A); + } + + if (Modified) + return DAL; + + delete DAL; + return nullptr; } Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -800,12 +800,6 @@ } for (Arg *A : Args) { - if (A->getOption().matches(options::OPT_Xarch__)) { - // Skip this argument unless the architecture matches BoundArch - if (BoundArch.empty() || A->getValue(0) != BoundArch) - continue; - TranslateXarchArgs(Args, A, DAL); - } DAL->append(A); } Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -378,12 +378,6 @@ const OptTable &Opts = getDriver().getOpts(); for (Arg *A : Args) { - if (A->getOption().matches(options::OPT_Xarch__)) { - // Skip this argument unless the architecture matches BoundArch. - if (BoundArch.empty() || A->getValue(0) != BoundArch) - continue; - TranslateXarchArgs(Args, A, DAL); - } DAL->append(A); } Index: clang/test/Driver/hip-options.hip =================================================================== --- clang/test/Driver/hip-options.hip +++ clang/test/Driver/hip-options.hip @@ -13,3 +13,16 @@ // RUN: -mllvm -amdgpu-early-inline-all=true %s 2>&1 | \ // RUN: FileCheck -check-prefix=MLLVM %s // MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true" + +// RUN: %clang -### -Xarch_device -g -nogpulib --cuda-gpu-arch=gfx900 \ +// RUN: -Xarch_device -fcf-protection=branch \ +// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEV %s +// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch" +// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch" +// DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}" + +// RUN: %clang -### -Xarch_host -g -nogpulib --cuda-gpu-arch=gfx900 \ +// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=HOST %s +// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" +// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" +// HOST: clang{{.*}} "-debug-info-kind={{.*}}"