diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td --- a/clang/include/clang/Driver/CC1Options.td +++ b/clang/include/clang/Driver/CC1Options.td @@ -482,6 +482,10 @@ def ast_merge : Separate<["-"], "ast-merge">, MetaVarName<"">, HelpText<"Merge the given AST file into the translation unit being compiled.">; +def aux_target_cpu : Separate<["-"], "aux-target-cpu">, + HelpText<"Target a specific auxiliary cpu type">; +def aux_target_feature : Separate<["-"], "aux-target-feature">, + HelpText<"Target specific auxiliary attributes">; def aux_triple : Separate<["-"], "aux-triple">, HelpText<"Auxiliary target triple.">; def code_completion_at : Separate<["-"], "code-completion-at">, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -549,6 +549,9 @@ def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group, Flags<[CC1Option]>, HelpText<"Assume functions may be convergent">; +def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">, + InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating " + "'-aux-target-cpu' and '-aux-target-feature'.">; def cuda_device_only : Flag<["--"], "cuda-device-only">, HelpText<"Compile CUDA code for device only">; def cuda_host_only : Flag<["--"], "cuda-host-only">, diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -426,9 +426,15 @@ /// (in the format produced by -fdump-record-layouts). std::string OverrideRecordLayoutsFile; - /// Auxiliary triple for CUDA compilation. + /// Auxiliary triple for CUDA/HIP compilation. std::string AuxTriple; + /// Auxiliary target CPU for CUDA/HIP compilation. + Optional AuxTargetCPU; + + /// Auxiliary target features for CUDA/HIP compilation. + Optional> AuxTargetFeatures; + /// Filename to write statistics to. std::string StatsFile; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -309,7 +309,7 @@ static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple, const ArgList &Args, ArgStringList &CmdArgs, - bool ForAS) { + bool ForAS, bool IsAux = false) { const Driver &D = TC.getDriver(); std::vector Features; switch (Triple.getArch()) { @@ -387,7 +387,7 @@ if (Last != I) continue; - CmdArgs.push_back("-target-feature"); + CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature"); CmdArgs.push_back(Name.data()); } } @@ -4627,6 +4627,23 @@ AsynchronousUnwindTables)) CmdArgs.push_back("-munwind-tables"); + // Prepare `-aux-target-cpu` and `-aux-target-feature` unless + // `--gpu-use-aux-triple-only` is specified. + if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) && + ((IsCuda && JA.isDeviceOffloading(Action::OFK_Cuda)) || + (IsHIP && JA.isDeviceOffloading(Action::OFK_HIP)))) { + const ArgList &HostArgs = + C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None); + std::string HostCPU = + getCPUName(HostArgs, *TC.getAuxTriple(), /*FromAs*/ false); + if (!HostCPU.empty()) { + CmdArgs.push_back("-aux-target-cpu"); + CmdArgs.push_back(Args.MakeArgString(HostCPU)); + } + getTargetFeatures(TC, *TC.getAuxTriple(), HostArgs, CmdArgs, + /*ForAS*/ false, /*IsAux*/ true); + } + TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind()); // FIXME: Handle -mtune=. diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -923,6 +923,10 @@ !getFrontendOpts().AuxTriple.empty()) { auto TO = std::make_shared(); TO->Triple = llvm::Triple::normalize(getFrontendOpts().AuxTriple); + if (getFrontendOpts().AuxTargetCPU) + TO->CPU = getFrontendOpts().AuxTargetCPU.getValue(); + if (getFrontendOpts().AuxTargetFeatures) + TO->FeaturesAsWritten = getFrontendOpts().AuxTargetFeatures.getValue(); TO->HostTriple = getTarget().getTriple().str(); setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO)); } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1931,6 +1931,10 @@ Opts.OverrideRecordLayoutsFile = std::string(Args.getLastArgValue(OPT_foverride_record_layout_EQ)); Opts.AuxTriple = std::string(Args.getLastArgValue(OPT_aux_triple)); + if (Args.hasArg(OPT_aux_target_cpu)) + Opts.AuxTargetCPU = std::string(Args.getLastArgValue(OPT_aux_target_cpu)); + if (Args.hasArg(OPT_aux_target_feature)) + Opts.AuxTargetFeatures = Args.getAllArgValues(OPT_aux_target_feature); Opts.StatsFile = std::string(Args.getLastArgValue(OPT_stats_file)); if (const Arg *A = Args.getLastArg(OPT_arcmt_check, diff --git a/clang/test/Driver/hip-host-cpu-features.hip b/clang/test/Driver/hip-host-cpu-features.hip new file mode 100644 --- /dev/null +++ b/clang/test/Driver/hip-host-cpu-features.hip @@ -0,0 +1,19 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -c -target x86_64-linux-gnu -march=znver2 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTCPU +// RUN: %clang -### -c -target x86_64-linux-gnu -msse3 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTSSE3 +// RUN: %clang -### -c -target x86_64-linux-gnu --gpu-use-aux-triple-only -march=znver2 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=NOHOSTCPU + +// HOSTCPU: "-cc1" "-triple" "amdgcn-amd-amdhsa" +// HOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// HOSTCPU-SAME: "-aux-target-cpu" "znver2" + +// HOSTSSE3: "-cc1" "-triple" "amdgcn-amd-amdhsa" +// HOSTSSE3-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// HOSTSSE3-SAME: "-aux-target-feature" "+sse3" + +// NOHOSTCPU: "-cc1" "-triple" "amdgcn-amd-amdhsa" +// NOHOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// NOHOSTCPU-NOT: "-aux-target-cpu" "znver2" diff --git a/clang/test/Preprocessor/hip-host-cpu-macros.cu b/clang/test/Preprocessor/hip-host-cpu-macros.cu new file mode 100644 --- /dev/null +++ b/clang/test/Preprocessor/hip-host-cpu-macros.cu @@ -0,0 +1,13 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +#ifdef __HIP_DEVICE_COMPILE__ +DEVICE __SSE3__ +#else +HOST __SSE3__ +#endif + +// RUN: %clang -x hip -E -target x86_64-linux-gnu -msse3 --cuda-gpu-arch=gfx803 -nogpulib -o - %s 2>&1 | FileCheck %s + +// CHECK-NOT: SSE3