Index: clang/include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticDriverKinds.td +++ clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -73,6 +73,7 @@ InGroup; def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">; def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">; +def err_drv_bad_target_id : Error<"Invalid target id: %0">; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< Index: clang/include/clang/Basic/OffloadArch.h =================================================================== --- /dev/null +++ clang/include/clang/Basic/OffloadArch.h @@ -0,0 +1,41 @@ +//===--- OffloadArch.h - Utilities for offload arch -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_OFFLOAD_ARCH_H +#define LLVM_CLANG_BASIC_OFFLOAD_ARCH_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" + +namespace clang { + +/// Get all feature strings that can be used in offload arch for \m Device. +/// Offload arch is a device name with optional offload arch feature strings +/// postfixed by a plus or minus sign delimited by colons, e.g. +/// gfx908:xnack+:sramecc-. Each device have limited +/// number of predefined offload arch features which have to follow predefined +/// order when showing up in a offload arch. +const llvm::SmallVector +getAllPossibleOffloadArchFeatures(llvm::StringRef Device); + +/// Parse an offload arch to get GPU arch and feature map. +/// Returns GPU arch. +/// Returns offload arch features in \p FeatureMap if it is not null pointer. +/// This function assumes \p OffloadArch is a valid offload arch. +/// If the offload arch contains feature+, map it to true. +/// If the offload arch contains feature-, map it to false. +/// If the offload arch does not contain a feature (default), do not map it. +/// Returns whether the offload arch features are valid in \p IsValid if it +/// is not a null pointer. +llvm::StringRef parseOffloadArch(llvm::StringRef OffloadArch, + llvm::StringMap *FeatureMap = nullptr, + bool *IsValid = nullptr); + +} // namespace clang + +#endif Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -1032,6 +1032,9 @@ return Triple; } + /// Returns the target id if supported. + virtual llvm::Optional getTargetId() const { return llvm::None; } + const llvm::DataLayout &getDataLayout() const { assert(DataLayout && "Uninitialized DataLayout!"); return *DataLayout; Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -572,7 +572,10 @@ def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[DriverOption]>, HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[DriverOption]>, - HelpText<"CUDA/HIP offloading device architecture (e.g. sm_35, gfx906). May be specified more than once.">; + HelpText<"CUDA offloading device architecture (e.g. sm_35), or HIP offloading target id in the form of a " + "device architecture followed by target id features delimited by a colon. Each target id feature " + "is a pre-defined string followed by a plus or minus sign (e.g. gfx908:xnack+:sramecc-). May be " + "specified more than once.">; def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>, Alias; def hip_link : Flag<["--"], "hip-link">, Index: clang/lib/Basic/CMakeLists.txt =================================================================== --- clang/lib/Basic/CMakeLists.txt +++ clang/lib/Basic/CMakeLists.txt @@ -53,6 +53,7 @@ LangStandards.cpp Module.cpp ObjCRuntime.cpp + OffloadArch.cpp OpenMPKinds.cpp OperatorPrecedence.cpp SanitizerBlacklist.cpp Index: clang/lib/Basic/OffloadArch.cpp =================================================================== --- /dev/null +++ clang/lib/Basic/OffloadArch.cpp @@ -0,0 +1,66 @@ +//===--- OffloadArch.cpp - Utilities for parsing offload arch -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/OffloadArch.h" +#include "llvm/Support/raw_ostream.h" +namespace clang { + +const llvm::SmallVector +getAllPossibleOffloadArchFeatures(llvm::StringRef Device) { + llvm::SmallVector Ret; + if (Device == "gfx902" || Device == "gfx908" || Device == "gfx909" || + Device.startswith("gfx10")) + Ret.push_back("xnack"); + if (Device == "gfx906" || Device == "gfx908" || Device == "gfx909") + Ret.push_back("sramecc"); + return Ret; +} + +llvm::StringRef parseOffloadArch(llvm::StringRef OffloadArch, + llvm::StringMap *FeatureMap, + bool *IsValid) { + llvm::StringRef ArchStr; + auto SetValid = [&](bool Valid) { + if (IsValid) + *IsValid = Valid; + return ArchStr; + }; + + auto Split = OffloadArch.split(':'); + ArchStr = Split.first; + if (!FeatureMap && !IsValid) + return ArchStr; + + auto Features = Split.second; + if (Features.empty()) + return SetValid(true); + + auto AllFeatures = getAllPossibleOffloadArchFeatures(ArchStr); + unsigned CurIndex = 0; + while (!Features.empty()) { + auto Splits = Features.split(':'); + auto Sign = Splits.first.back(); + auto Feature = Splits.first.drop_back(); + llvm::errs() << Feature << " " << Sign << " " << Splits.second << '\n'; + if (Sign != '+' && Sign != '-') + return SetValid(false); + bool IsOn = Sign == '+'; + for (; CurIndex < AllFeatures.size(); ++CurIndex) { + if (Feature == AllFeatures[CurIndex]) { + if (FeatureMap) + (*FeatureMap)[Feature] = IsOn; + break; + } + } + if (CurIndex == AllFeatures.size()) + return SetValid(false); + Features = Splits.second; + } + return SetValid(true); +}; +} // namespace clang Index: clang/lib/Basic/Targets/AMDGPU.h =================================================================== --- clang/lib/Basic/Targets/AMDGPU.h +++ clang/lib/Basic/Targets/AMDGPU.h @@ -13,6 +13,7 @@ #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H +#include "clang/Basic/OffloadArch.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "llvm/ADT/StringSet.h" @@ -41,6 +42,14 @@ llvm::AMDGPU::GPUKind GPUKind; unsigned GPUFeatures; + /// Target id is device name followed by optional feature name postfixed + /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-. + /// If the target id contains +feature, map it to true. + /// If the target id contains -feature, map it to false. + /// If the target id does not contain a feature (default), do not map it. + llvm::StringMap OffloadArchFeatures; + std::string TargetId; + bool hasFP64() const { return getTriple().getArch() == llvm::Triple::amdgcn || !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64); @@ -361,6 +370,37 @@ void setAuxTarget(const TargetInfo *Aux) override; bool hasExtIntType() const override { return true; } + + // Record offload arch features since they are needed for defining the + // pre-defined macros. + bool handleTargetFeatures(std::vector &Features, + DiagnosticsEngine &Diags) override { + for (auto &F : Features) { + assert(F.front() == '+' || F.front() == '-'); + bool IsOn = F.front() == '+'; + StringRef Name = StringRef(F).drop_front(); + if (Name == "sram-ecc") + Name = "sramecc"; + if (Name != "xnack" && Name != "sramecc") + continue; + assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end()); + OffloadArchFeatures[Name] = IsOn; + } + return true; + } + + Optional getTargetId() const override { + if (!isAMDGCN(getTriple())) + return llvm::None; + StringRef CanonName = getArchNameAMDGCN(GPUKind); + std::string TargetId = CanonName.str(); + for (auto F : getAllPossibleOffloadArchFeatures(CanonName)) { + auto Loc = OffloadArchFeatures.find(F); + if (Loc != OffloadArchFeatures.end()) + TargetId = TargetId + ':' + F.str() + (Loc->second ? "+" : "-"); + } + return TargetId; + } }; } // namespace targets Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -334,6 +334,20 @@ StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); + if (isAMDGCN(getTriple())) { + Builder.defineMacro("__amdgcn_processor__", + Twine("\"") + Twine(CanonName) + Twine("\"")); + Builder.defineMacro("__amdgcn_target_id__", + Twine("\"") + Twine(getTargetId().getValue()) + + Twine("\"")); + for (auto F : getAllPossibleOffloadArchFeatures(CanonName)) { + auto Loc = OffloadArchFeatures.find(F); + if (Loc != OffloadArchFeatures.end()) { + Builder.defineMacro(Twine("__amdgcn_") + Twine(F) + Twine("__"), + Loc->second ? "1" : "0"); + } + } + } } // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -600,6 +600,14 @@ llvm::DenormalMode::IEEE); } + if (auto TargetId = getTarget().getTargetId()) + getModule().addModuleFlag( + llvm::Module::MergeTargetId, "target-id", + llvm::MDString::get( + getModule().getContext(), + (Twine(getTriple().str()) + Twine("-") + Twine(TargetId.getValue())) + .str())); + // Emit OpenCL specific module metadata: OpenCL/SPIR version. if (LangOpts.OpenCL) { EmitOpenCLMetadata(); Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -45,6 +45,7 @@ #include "ToolChains/TCE.h" #include "ToolChains/WebAssembly.h" #include "ToolChains/XCore.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" @@ -2355,8 +2356,20 @@ bool EmitLLVM = false; bool EmitAsm = false; + /// Id to identify each device compilation. For CUDA it is simply the + /// GPU arch string. For HIP it is either the GPU arch string or GPU + /// arch string plus feature strings delimited by a plus sign, e.g. + /// gfx906+xnack. + struct TargetId { + /// Target id string which is persistent throughout the compilation. + const char *Id; + TargetId(CudaArch Arch) { Id = CudaArchToString(Arch); } + TargetId(const char *Id) : Id(Id) {} + operator const char *() { return Id; } + operator StringRef() { return StringRef(Id); } + }; /// List of GPU architectures to use in this compilation. - SmallVector GpuArchList; + SmallVector GpuArchList; /// The CUDA actions for the current input. ActionList CudaDeviceActions; @@ -2439,7 +2452,7 @@ for (auto Arch : GpuArchList) { CudaDeviceActions.push_back(UA); - UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch), + UA->registerDependentActionInfo(ToolChains[0], Arch, AssociatedOffloadKind); } return ABRT_Success; @@ -2450,10 +2463,9 @@ void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. - auto AddTopLevel = [&](Action *A, CudaArch BoundArch) { + auto AddTopLevel = [&](Action *A, TargetId TargetId) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch), - AssociatedOffloadKind); + Dep.add(*A, *ToolChains.front(), TargetId, AssociatedOffloadKind); AL.push_back(C.MakeAction(Dep, A->getType())); }; @@ -2481,6 +2493,8 @@ CudaDeviceActions.clear(); } + virtual bool IsValidOffloadArch(StringRef Arch) const = 0; + bool initialize() override { assert(AssociatedOffloadKind == Action::OFK_Cuda || AssociatedOffloadKind == Action::OFK_HIP); @@ -2528,7 +2542,7 @@ EmitAsm = Args.getLastArg(options::OPT_S); // Collect all cuda_gpu_arch parameters, removing duplicates. - std::set GpuArchs; + std::set GpuArchs; bool Error = false; for (Arg *A : Args) { if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || @@ -2542,21 +2556,19 @@ GpuArchs.clear(); continue; } - CudaArch Arch = StringToCudaArch(ArchStr); - if (Arch == CudaArch::UNKNOWN) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + if (!IsValidOffloadArch(ArchStr)) { Error = true; } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) - GpuArchs.insert(Arch); + GpuArchs.insert(ArchStr); else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) - GpuArchs.erase(Arch); + GpuArchs.erase(ArchStr); else llvm_unreachable("Unexpected option."); } // Collect list of GPUs remaining in the set. - for (CudaArch Arch : GpuArchs) - GpuArchList.push_back(Arch); + for (auto Arch : GpuArchs) + GpuArchList.push_back(Arch.data()); // Default to sm_20 which is the lowest common denominator for // supported GPUs. sm_20 code should work correctly, if @@ -2578,6 +2590,15 @@ DefaultCudaArch = CudaArch::SM_20; } + bool IsValidOffloadArch(StringRef ArchStr) const override { + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + return false; + } + return true; + } + ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, @@ -2637,8 +2658,7 @@ for (auto &A : {AssembleAction, BackendAction}) { OffloadAction::DeviceDependences DDep; - DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]), - Action::OFK_Cuda); + DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); DeviceActions.push_back( C.MakeAction(DDep, A->getType())); } @@ -2697,6 +2717,22 @@ bool canUseBundlerUnbundler() const override { return true; } + bool IsValidOffloadArch(StringRef IdStr) const override { + bool IsValid; + const StringRef ArchStr = + parseOffloadArch(IdStr, /*FeatureMap=*/nullptr, &IsValid); + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + return false; + } + if (!IsValid) { + C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr; + return false; + } + return true; + }; + ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, @@ -2738,8 +2774,8 @@ // device arch of the next action being propagated to the above link // action. OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction( DDep, CudaDeviceActions[I]->getType()); } @@ -2795,8 +2831,8 @@ for (auto &LI : DeviceLinkerInputs) { auto *DeviceLinkAction = C.MakeAction(LI, types::TY_Image); - DA.add(*DeviceLinkAction, *ToolChains[0], - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DA.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I], + AssociatedOffloadKind); ++I; } } Index: clang/lib/Driver/ToolChains/AMDGPU.h =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.h +++ clang/lib/Driver/ToolChains/AMDGPU.h @@ -221,6 +221,14 @@ static bool isWave64(const llvm::opt::ArgList &DriverArgs, llvm::AMDGPU::GPUKind Kind); + +protected: + /// Translate -mcpu option containing target id to cc1 options. + /// Returns the GPU name. + StringRef translateTargetId(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const; + + StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const; }; class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -9,6 +9,7 @@ #include "AMDGPU.h" #include "CommonArgs.h" #include "InputInfo.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" #include "llvm/Option/ArgList.h" @@ -254,16 +255,22 @@ DerivedArgList *DAL = Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); - // Do nothing if not OpenCL (-x cl) - if (!Args.getLastArgValue(options::OPT_x).equals("cl")) - return DAL; + const OptTable &Opts = getDriver().getOpts(); if (!DAL) DAL = new DerivedArgList(Args.getBaseArgs()); - for (auto *A : Args) - DAL->append(A); + for (auto *A : Args) { + // Clang always pass -mcpu to clang -cc1 by -target-cpu. For translating + // target id, we have to pass it by -march. + if (A->getOption().matches(options::OPT_mcpu_EQ)) + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + Args.getLastArgValue(options::OPT_mcpu_EQ)); + else + DAL->append(A); + } - const OptTable &Opts = getDriver().getOpts(); + if (!Args.getLastArgValue(options::OPT_x).equals("cl")) + return DAL; // Phase 1 (.cl -> .bc) if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { @@ -308,7 +315,8 @@ if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { - auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch()); + auto Kind = + llvm::AMDGPU::parseArchAMDGCN(parseOffloadArch(JA.getOffloadingArch())); if (FPType && FPType == &llvm::APFloat::IEEEsingle() && DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, @@ -318,7 +326,7 @@ return llvm::DenormalMode::getIEEE(); } - const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + const StringRef GpuArch = getGPUArch(DriverArgs); auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); // TODO: There are way too many flags that change this. Do we need to check @@ -352,6 +360,8 @@ const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { + // Allow using target id in -mcpu. + translateTargetId(DriverArgs, CC1Args); // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, @@ -362,6 +372,56 @@ } } +StringRef +AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { + assert(!DriverArgs.hasArg(options::OPT_mcpu_EQ) && + "-mcpu should have been translated to -march"); + return parseOffloadArch(DriverArgs.getLastArgValue(options::OPT_march_EQ)); +} + +StringRef +AMDGPUToolChain::translateTargetId(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const { + StringRef GpuArch; + llvm::StringMap FeatureMap; + assert(!DriverArgs.hasArg(options::OPT_mcpu_EQ) && + "-mcpu should have been translated to -march"); + StringRef TargetId = DriverArgs.getLastArgValue(options::OPT_march_EQ); + if (TargetId.empty()) + return GpuArch; + + bool IsValid; + GpuArch = parseOffloadArch(TargetId, &FeatureMap, &IsValid); + if (!IsValid) { + getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetId; + return GpuArch; + } + + if (GpuArch.empty()) + return GpuArch; + + CC1Args.push_back("-target-cpu"); + CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); + + // Iterate through all possible target id features for the given GPU. + // If it is mapped to true, pass -mfeature to clang -cc1. + // If it is mapped to false, pass -mno-feature to clang -cc1. + // If it is not in the map (default), do not pass it to clang -cc1. + for (auto Feature : getAllPossibleOffloadArchFeatures(GpuArch)) { + auto Pos = FeatureMap.find(Feature); + if (Pos == FeatureMap.end()) + continue; + CC1Args.push_back("-target-feature"); + auto FeatureName = Feature; + if (Feature == "sramecc") + FeatureName = "sram-ecc"; + std::string Opt = (Twine(Pos->second ? "+" : "-") + FeatureName).str(); + CC1Args.push_back(DriverArgs.MakeArgStringRef(Opt)); + } + + return GpuArch; +} + void ROCMToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { @@ -383,7 +443,7 @@ } // Get the device name and canonicalize it - const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + const StringRef GpuArch = getGPUArch(DriverArgs); auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -11,6 +11,7 @@ #include "CommonArgs.h" #include "InputInfo.h" #include "clang/Basic/Cuda.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" @@ -117,6 +118,7 @@ Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName) const { + SubArchName = parseOffloadArch(SubArchName); // Construct opt command. ArgStringList OptArgs; // The input to opt is the output from llvm-link. @@ -145,6 +147,7 @@ const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName, bool OutputIsAsm) const { + SubArchName = parseOffloadArch(SubArchName); // Construct llc command. ArgStringList LlcArgs; // The input to llc is the output from opt. @@ -281,7 +284,8 @@ Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + // Allow using target id in --offload-arch. + StringRef GpuArch = translateTargetId(DriverArgs, CC1Args); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && @@ -289,8 +293,6 @@ auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); - CC1Args.push_back("-target-cpu"); - CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, @@ -391,6 +393,7 @@ } if (!BoundArch.empty()) { + DAL->eraseArg(options::OPT_mcpu_EQ); DAL->eraseArg(options::OPT_march_EQ); DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch); } Index: clang/test/CodeGenCUDA/target-id.hip =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/target-id.hip @@ -0,0 +1,13 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -target-cpu gfx908 -target-feature +xnack \ +// RUN: -target-feature -sramecc \ +// RUN: -emit-llvm -o - %s | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: !{i32 8, !"target-id", !"amdgcn-amd-amdhsa-gfx908:xnack+:sramecc-"} +__global__ void foo() {} Index: clang/test/CodeGenOpenCL/target-id.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/target-id.cl @@ -0,0 +1,11 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -target-cpu gfx908 -target-feature +xnack \ +// RUN: -target-feature -sramecc \ +// RUN: -emit-llvm -o - %s | FileCheck %s + +// CHECK: !{i32 8, !"target-id", !"amdgcn-amd-amdhsa-gfx908:xnack+:sramecc-"} +kernel void foo() {} Index: clang/test/Driver/amdgpu-macros.cl =================================================================== --- clang/test/Driver/amdgpu-macros.cl +++ clang/test/Driver/amdgpu-macros.cl @@ -310,3 +310,24 @@ // GFX1010-DAG: #define __gfx1010__ 1 // GFX1011-DAG: #define __gfx1011__ 1 // GFX1012-DAG: #define __gfx1012__ 1 + +// GFX600-DAG: #define __amdgcn_processor__ "gfx600" +// GFX601-DAG: #define __amdgcn_processor__ "gfx601" +// GFX700-DAG: #define __amdgcn_processor__ "gfx700" +// GFX701-DAG: #define __amdgcn_processor__ "gfx701" +// GFX702-DAG: #define __amdgcn_processor__ "gfx702" +// GFX703-DAG: #define __amdgcn_processor__ "gfx703" +// GFX704-DAG: #define __amdgcn_processor__ "gfx704" +// GFX801-DAG: #define __amdgcn_processor__ "gfx801" +// GFX802-DAG: #define __amdgcn_processor__ "gfx802" +// GFX803-DAG: #define __amdgcn_processor__ "gfx803" +// GFX810-DAG: #define __amdgcn_processor__ "gfx810" +// GFX900-DAG: #define __amdgcn_processor__ "gfx900" +// GFX902-DAG: #define __amdgcn_processor__ "gfx902" +// GFX904-DAG: #define __amdgcn_processor__ "gfx904" +// GFX906-DAG: #define __amdgcn_processor__ "gfx906" +// GFX908-DAG: #define __amdgcn_processor__ "gfx908" +// GFX909-DAG: #define __amdgcn_processor__ "gfx909" +// GFX1010-DAG: #define __amdgcn_processor__ "gfx1010" +// GFX1011-DAG: #define __amdgcn_processor__ "gfx1011" +// GFX1012-DAG: #define __amdgcn_processor__ "gfx1012" Index: clang/test/Driver/invalid-target-id.hip =================================================================== --- /dev/null +++ clang/test/Driver/invalid-target-id.hip @@ -0,0 +1,62 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: Unsupported CUDA gpu architecture + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908:sramecc+:xnack+ \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid target id: gfx908:sramecc+:xnack+ + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908:unknown+ \ +// RUN: --offload-arch=gfx908+sramecc+unknown \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid target id: gfx908:unknown+ + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908:sramecc+:unknown+ \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid target id: gfx908:sramecc+:unknown+ + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx900:xnack+ \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid target id: gfx900:xnack+ + +/ RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx900:xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOSIGN %s + +// NOSIGN: error: Invalid target id: gfx900:xnack + +/ RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s + +// NOCOLON: error: Unsupported CUDA gpu architecture Index: clang/test/Driver/invalid-target-id.cl =================================================================== --- /dev/null +++ clang/test/Driver/invalid-target-id.cl @@ -0,0 +1,45 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: unknown target CPU 'gfx908xnack' + +// RUN: not %clang -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908:sramecc+:xnack+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid target id: gfx908:sramecc+:xnack+ + +// RUN: not %clang -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908:unknown+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid target id: gfx908:unknown+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:sramecc+:unknown+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid target id: gfx908:sramecc+:unknown+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900:xnack+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid target id: gfx900:xnack+ + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900:xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOSIGN %s + +// NOSIGN: error: Invalid target id: gfx900:xnack + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900+xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s + +// NOCOLON: error: unknown target CPU 'gfx900+xnack' Index: clang/test/Driver/target-id-macros.hip =================================================================== --- /dev/null +++ clang/test/Driver/target-id-macros.hip @@ -0,0 +1,12 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -E -dM -target x86_64-linux-gnu --cuda-device-only \ +// RUN: --offload-arch=gfx908:xnack+:sramecc- -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck %s + +// CHECK-DAG: #define __amdgcn_processor__ "gfx908" +// CHECK-DAG: #define __amdgcn_xnack__ 1 +// CHECK-DAG: #define __amdgcn_sramecc__ 0 +// CHECK-DAG: #define __amdgcn_target_id__ "gfx908:xnack+:sramecc-" Index: clang/test/Driver/target-id-macros.cl =================================================================== --- /dev/null +++ clang/test/Driver/target-id-macros.cl @@ -0,0 +1,20 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:xnack+:sramecc- -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck %s + +// RUN: %clang -E -dM -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908:xnack+:sramecc- -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck %s + +// RUN: %clang -E -dM -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908:xnack+:sramecc- -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck %s + +// CHECK-DAG: #define __amdgcn_processor__ "gfx908" +// CHECK-DAG: #define __amdgcn_xnack__ 1 +// CHECK-DAG: #define __amdgcn_sramecc__ 0 +// CHECK-DAG: #define __amdgcn_target_id__ "gfx908:xnack+:sramecc-" Index: clang/test/Driver/target-id.hip =================================================================== --- /dev/null +++ clang/test/Driver/target-id.hip @@ -0,0 +1,51 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908:xnack+:sramecc+ \ +// RUN: --offload-arch=gfx908:xnack+:sramecc- \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" "-fcuda-is-device" + +// CHECK: [[OPT:".*opt"]] {{.*}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906_BC:".*-gfx908-optimized.*bc"]] + +// CHECK: [[LLC: ".*llc"]] [[OPT_906_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" +// CHECK-SAME: {{.*}} "-target-feature" "+xnack" +// CHECK-SAME: {{.*}} "-target-feature" "+sram-ecc" + +// CHECK: [[OPT]] {{.*"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906XE_BC:".*-gfx908:xnack\+:sramecc\+.*bc"]] + +// CHECK: [[LLC]] [[OPT_906XE_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" +// CHECK-SAME: {{.*}} "-target-feature" "+xnack" +// CHECK-SAME: {{.*}} "-target-feature" "-sram-ecc" + +// CHECK: [[OPT]] {{.*}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906XN_BC:".*-gfx908:xnack\+:sramecc\-.*bc"]] + +// CHECK: [[LLC]] [[OPT_906XN_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" + + +// CHECK: {{".*clang-offload-bundler"}} +// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx908,hip-amdgcn-amd-amdhsa-gfx908:xnack+:sramecc+,hip-amdgcn-amd-amdhsa-gfx908:xnack+:sramecc-" Index: clang/test/Driver/target-id.cl =================================================================== --- /dev/null +++ clang/test/Driver/target-id.cl @@ -0,0 +1,21 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908:xnack+:sramecc- \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + + +// RUN: %clang -### -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908:xnack+:sramecc- \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + + +// RUN: %clang -### -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908:xnack+:sramecc- \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + +// CHECK: "-target-cpu" "gfx908" +// CHECK-SAME: "-target-feature" "+xnack" +// CHECK-SAME: "-target-feature" "-sram-ecc"