Index: clang/include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticDriverKinds.td +++ clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -73,6 +73,7 @@ InGroup; def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">; def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">; +def err_drv_bad_target_id : Error<"Invalid target id: %0">; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< Index: clang/include/clang/Basic/OffloadArch.h =================================================================== --- /dev/null +++ clang/include/clang/Basic/OffloadArch.h @@ -0,0 +1,41 @@ +//===--- OffloadArch.h - Utilities for offload arch -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_OFFLOAD_ARCH_H +#define LLVM_CLANG_BASIC_OFFLOAD_ARCH_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" + +namespace clang { + +/// Get all feature strings that can be used in offload arch for \m Device. +/// Offload arch is a device name with optional offload arch feature strings +/// delimite by a plus or minus sign, e.g. gfx906+xnack-sramecc. Offload arch +/// feature itsel cannot contain plus or minus sign. Each device have limited +/// number of predefined offload arch features which have to follow predefined +/// order when showing up in a offload arch. +const llvm::SmallVector +getAllPossibleOffloadArchFeatures(llvm::StringRef Device); + +/// Parse an offload arch to get GPU arch and feature map. +/// Returns GPU arch. +/// Returns offload arch features in \p FeatureMap if it is not null pointer. +/// This function assumes \p OffloadArch is a valid offload arch. +/// If the offload arch contains +feature, map it to true. +/// If the offload arch contains -feature, map it to false. +/// If the offload arch does not contain a feature (default), do not map it. +/// Returns whether the offload arch features are valid in \p IsValid if it +/// is not a null pointer. +llvm::StringRef parseOffloadArch(llvm::StringRef OffloadArch, + llvm::StringMap *FeatureMap = nullptr, + bool *IsValid = nullptr); + +} // namespace clang + +#endif Index: clang/lib/Basic/CMakeLists.txt =================================================================== --- clang/lib/Basic/CMakeLists.txt +++ clang/lib/Basic/CMakeLists.txt @@ -53,6 +53,7 @@ LangStandards.cpp Module.cpp ObjCRuntime.cpp + OffloadArch.cpp OpenMPKinds.cpp OperatorPrecedence.cpp SanitizerBlacklist.cpp Index: clang/lib/Basic/OffloadArch.cpp =================================================================== --- /dev/null +++ clang/lib/Basic/OffloadArch.cpp @@ -0,0 +1,78 @@ +//===--- OffloadArch.cpp - Utilities for parsing offload arch -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/OffloadArch.h" + +namespace clang { + +const llvm::SmallVector +getAllPossibleOffloadArchFeatures(llvm::StringRef Device) { + llvm::SmallVector Ret; + if (Device == "gfx902" || Device == "gfx908" || Device == "gfx909" || + Device.startswith("gfx10")) + Ret.push_back("xnack"); + if (Device == "gfx906" || Device == "gfx908" || Device == "gfx909") + Ret.push_back("sramecc"); + return Ret; +} + +llvm::StringRef parseOffloadArch(llvm::StringRef OffloadArch, + llvm::StringMap *FeatureMap, + bool *IsValid) { + // Return a pair of StringRef separated by the first occurrence of '+' + // or '-'. The delimiter is returned in Delim. + auto ParseTargetId = [](llvm::StringRef S, char &Delim) { + std::pair Ret; + auto Pos = S.find_first_of("+-"); + if (Pos == S.npos) { + Ret.first = S; + return Ret; + } + Delim = S[Pos]; + Ret.first = S.substr(0, Pos); + Ret.second = S.drop_front(Pos + 1); + return Ret; + }; + char Delim; + auto Split = ParseTargetId(OffloadArch, Delim); + llvm::StringRef ArchStr = Split.first; + if (!FeatureMap && !IsValid) + return ArchStr; + + auto Features = Split.second; + if (Features.empty()) { + if (IsValid) + *IsValid = true; + return ArchStr; + } + + auto AllFeatures = getAllPossibleOffloadArchFeatures(ArchStr); + unsigned CurIndex = 0; + while (!Features.empty()) { + assert(Delim == '+' || Delim == '-'); + bool IsOn = Delim == '+'; + auto Splits = ParseTargetId(Features, Delim); + for (; CurIndex < AllFeatures.size(); ++CurIndex) { + if (Splits.first == AllFeatures[CurIndex]) { + if (FeatureMap) + (*FeatureMap)[Splits.first] = IsOn; + break; + } + } + if (CurIndex == AllFeatures.size()) { + if (IsValid) + *IsValid = false; + return ArchStr; + } + Features = Splits.second; + } + if (IsValid) + *IsValid = true; + return ArchStr; +}; +} // namespace clang Index: clang/lib/Basic/Targets/AMDGPU.h =================================================================== --- clang/lib/Basic/Targets/AMDGPU.h +++ clang/lib/Basic/Targets/AMDGPU.h @@ -13,6 +13,7 @@ #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H +#include "clang/Basic/OffloadArch.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "llvm/ADT/StringSet.h" @@ -41,6 +42,14 @@ llvm::AMDGPU::GPUKind GPUKind; unsigned GPUFeatures; + /// Features set by extended --offload-arch, -mcpu, or -target-cpu options + /// by the format of GPU name followed by optional feature name delimitted + /// by plus or minus sign, e.g. gfx908+xnack-sramecc. + /// If the offload arch contains +feature, map it to true. + /// If the offload arch contains -feature, map it to false. + /// If the offload arch does not contain a feature (default), do not map it. + llvm::StringMap OffloadArchFeatures; + bool hasFP64() const { return getTriple().getArch() == llvm::Triple::amdgcn || !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64); @@ -356,6 +365,25 @@ void setAuxTarget(const TargetInfo *Aux) override; bool hasExtIntType() const override { return true; } + + // Record offload arch features since they are needed for defining the + // pre-defined macros. + bool handleTargetFeatures(std::vector &Features, + DiagnosticsEngine &Diags) override { + for (auto &F : Features) { + llvm::errs() << "Feature: " << F << '\n'; + assert(F.front() == '+' || F.front() == '-'); + bool IsOn = F.front() == '+'; + StringRef Name = StringRef(F).drop_front(); + if (Name == "sram-ecc") + Name = "sramecc"; + if (Name != "xnack" && Name != "sramecc") + continue; + assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end()); + OffloadArchFeatures[Name] = IsOn; + } + return true; + } }; } // namespace targets Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -334,6 +334,22 @@ StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); + if (isAMDGCN(getTriple())) { + Builder.defineMacro("__amdgcn_processor__", + Twine("\"") + Twine(CanonName) + Twine("\"")); + + std::string TargetId = CanonName.str(); + for (auto F : getAllPossibleOffloadArchFeatures(CanonName)) { + auto Loc = OffloadArchFeatures.find(F); + if (Loc != OffloadArchFeatures.end()) { + Builder.defineMacro(Twine("__amdgcn_") + Twine(F) + Twine("__"), + Loc->second ? "1" : "0"); + TargetId = TargetId + (Loc->second ? "+" : "-") + F.str(); + } + } + Builder.defineMacro("__amdgcn_target_id__", + Twine("\"") + Twine(TargetId) + Twine("\"")); + } } // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -45,6 +45,7 @@ #include "ToolChains/TCE.h" #include "ToolChains/WebAssembly.h" #include "ToolChains/XCore.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" @@ -2355,8 +2356,20 @@ bool EmitLLVM = false; bool EmitAsm = false; + /// Id to identify each device compilation. For CUDA it is simply the + /// GPU arch string. For HIP it is either the GPU arch string or GPU + /// arch string plus feature strings delimited by a plus sign, e.g. + /// gfx906+xnack. + struct TargetId { + /// Target id string which is persistent throughout the compilation. + const char *Id; + TargetId(CudaArch Arch) { Id = CudaArchToString(Arch); } + TargetId(const char *Id) : Id(Id) {} + operator const char *() { return Id; } + operator StringRef() { return StringRef(Id); } + }; /// List of GPU architectures to use in this compilation. - SmallVector GpuArchList; + SmallVector GpuArchList; /// The CUDA actions for the current input. ActionList CudaDeviceActions; @@ -2439,7 +2452,7 @@ for (auto Arch : GpuArchList) { CudaDeviceActions.push_back(UA); - UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch), + UA->registerDependentActionInfo(ToolChains[0], Arch, AssociatedOffloadKind); } return ABRT_Success; @@ -2450,10 +2463,9 @@ void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. - auto AddTopLevel = [&](Action *A, CudaArch BoundArch) { + auto AddTopLevel = [&](Action *A, TargetId TargetId) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch), - AssociatedOffloadKind); + Dep.add(*A, *ToolChains.front(), TargetId, AssociatedOffloadKind); AL.push_back(C.MakeAction(Dep, A->getType())); }; @@ -2481,6 +2493,8 @@ CudaDeviceActions.clear(); } + virtual bool IsValidOffloadArch(StringRef Arch) const = 0; + bool initialize() override { assert(AssociatedOffloadKind == Action::OFK_Cuda || AssociatedOffloadKind == Action::OFK_HIP); @@ -2528,7 +2542,7 @@ EmitAsm = Args.getLastArg(options::OPT_S); // Collect all cuda_gpu_arch parameters, removing duplicates. - std::set GpuArchs; + std::set GpuArchs; bool Error = false; for (Arg *A : Args) { if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || @@ -2542,21 +2556,19 @@ GpuArchs.clear(); continue; } - CudaArch Arch = StringToCudaArch(ArchStr); - if (Arch == CudaArch::UNKNOWN) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + if (!IsValidOffloadArch(ArchStr)) { Error = true; } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) - GpuArchs.insert(Arch); + GpuArchs.insert(ArchStr); else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) - GpuArchs.erase(Arch); + GpuArchs.erase(ArchStr); else llvm_unreachable("Unexpected option."); } // Collect list of GPUs remaining in the set. - for (CudaArch Arch : GpuArchs) - GpuArchList.push_back(Arch); + for (auto Arch : GpuArchs) + GpuArchList.push_back(Arch.data()); // Default to sm_20 which is the lowest common denominator for // supported GPUs. sm_20 code should work correctly, if @@ -2578,6 +2590,15 @@ DefaultCudaArch = CudaArch::SM_20; } + bool IsValidOffloadArch(StringRef ArchStr) const override { + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + return false; + } + return true; + } + ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, @@ -2637,8 +2658,7 @@ for (auto &A : {AssembleAction, BackendAction}) { OffloadAction::DeviceDependences DDep; - DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]), - Action::OFK_Cuda); + DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); DeviceActions.push_back( C.MakeAction(DDep, A->getType())); } @@ -2697,6 +2717,22 @@ bool canUseBundlerUnbundler() const override { return true; } + bool IsValidOffloadArch(StringRef IdStr) const override { + bool IsValid; + const StringRef ArchStr = + parseOffloadArch(IdStr, /*FeatureMap=*/nullptr, &IsValid); + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + return false; + } + if (!IsValid) { + C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr; + return false; + } + return true; + }; + ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, @@ -2738,8 +2774,8 @@ // device arch of the next action being propagated to the above link // action. OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction( DDep, CudaDeviceActions[I]->getType()); } @@ -2795,8 +2831,8 @@ for (auto &LI : DeviceLinkerInputs) { auto *DeviceLinkAction = C.MakeAction(LI, types::TY_Image); - DA.add(*DeviceLinkAction, *ToolChains[0], - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DA.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I], + AssociatedOffloadKind); ++I; } } Index: clang/lib/Driver/ToolChains/AMDGPU.h =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.h +++ clang/lib/Driver/ToolChains/AMDGPU.h @@ -221,6 +221,14 @@ static bool isWave64(const llvm::opt::ArgList &DriverArgs, llvm::AMDGPU::GPUKind Kind); + +protected: + /// Translate -mcpu option containing target id to cc1 options. + /// Returns the GPU name. + StringRef translateTargetId(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const; + + StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const; }; class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -9,6 +9,7 @@ #include "AMDGPU.h" #include "CommonArgs.h" #include "InputInfo.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" #include "llvm/Option/ArgList.h" @@ -247,16 +248,22 @@ DerivedArgList *DAL = Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); - // Do nothing if not OpenCL (-x cl) - if (!Args.getLastArgValue(options::OPT_x).equals("cl")) - return DAL; + const OptTable &Opts = getDriver().getOpts(); if (!DAL) DAL = new DerivedArgList(Args.getBaseArgs()); - for (auto *A : Args) - DAL->append(A); + for (auto *A : Args) { + // Clang always pass -mcpu to clang -cc1 by -target-cpu. For translating + // target id, we have to pass it by -march. + if (A->getOption().matches(options::OPT_mcpu_EQ)) + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + Args.getLastArgValue(options::OPT_mcpu_EQ)); + else + DAL->append(A); + } - const OptTable &Opts = getDriver().getOpts(); + if (!Args.getLastArgValue(options::OPT_x).equals("cl")) + return DAL; // Phase 1 (.cl -> .bc) if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { @@ -301,7 +308,8 @@ if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { - auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch()); + auto Kind = + llvm::AMDGPU::parseArchAMDGCN(parseOffloadArch(JA.getOffloadingArch())); if (FPType && FPType == &llvm::APFloat::IEEEsingle() && DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, @@ -311,7 +319,7 @@ return llvm::DenormalMode::getIEEE(); } - const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + const StringRef GpuArch = getGPUArch(DriverArgs); auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); // TODO: There are way too many flags that change this. Do we need to check @@ -345,6 +353,8 @@ const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { + // Allow using target id in -mcpu. + translateTargetId(DriverArgs, CC1Args); // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, @@ -355,6 +365,56 @@ } } +StringRef +AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { + assert(!DriverArgs.hasArg(options::OPT_mcpu_EQ) && + "-mcpu should have been translated to -march"); + return parseOffloadArch(DriverArgs.getLastArgValue(options::OPT_march_EQ)); +} + +StringRef +AMDGPUToolChain::translateTargetId(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const { + StringRef GpuArch; + llvm::StringMap FeatureMap; + assert(!DriverArgs.hasArg(options::OPT_mcpu_EQ) && + "-mcpu should have been translated to -march"); + StringRef TargetId = DriverArgs.getLastArgValue(options::OPT_march_EQ); + if (TargetId.empty()) + return GpuArch; + + bool IsValid; + GpuArch = parseOffloadArch(TargetId, &FeatureMap, &IsValid); + if (!IsValid) { + getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetId; + return GpuArch; + } + + if (GpuArch.empty()) + return GpuArch; + + CC1Args.push_back("-target-cpu"); + CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); + + // Iterate through all possible target id features for the given GPU. + // If it is mapped to true, pass -mfeature to clang -cc1. + // If it is mapped to false, pass -mno-feature to clang -cc1. + // If it is not in the map (default), do not pass it to clang -cc1. + for (auto Feature : getAllPossibleOffloadArchFeatures(GpuArch)) { + auto Pos = FeatureMap.find(Feature); + if (Pos == FeatureMap.end()) + continue; + CC1Args.push_back("-target-feature"); + auto FeatureName = Feature; + if (Feature == "sramecc") + FeatureName = "sram-ecc"; + std::string Opt = (Twine(Pos->second ? "+" : "-") + FeatureName).str(); + CC1Args.push_back(DriverArgs.MakeArgStringRef(Opt)); + } + + return GpuArch; +} + void ROCMToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadingKind) const { @@ -376,7 +436,7 @@ } // Get the device name and canonicalize it - const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + const StringRef GpuArch = getGPUArch(DriverArgs); auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -11,6 +11,7 @@ #include "CommonArgs.h" #include "InputInfo.h" #include "clang/Basic/Cuda.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" @@ -117,6 +118,9 @@ Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName) const { + auto Pos = SubArchName.find_first_of("+-"); + if (Pos != SubArchName.npos) + SubArchName = SubArchName.substr(0, Pos); // Construct opt command. ArgStringList OptArgs; // The input to opt is the output from llvm-link. @@ -145,6 +149,9 @@ const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName, bool OutputIsAsm) const { + auto Pos = SubArchName.find_first_of("+-"); + if (Pos != SubArchName.npos) + SubArchName = SubArchName.substr(0, Pos); // Construct llc command. ArgStringList LlcArgs; // The input to llc is the output from opt. @@ -281,7 +288,8 @@ Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + // Allow using target id in --offload-arch. + StringRef GpuArch = translateTargetId(DriverArgs, CC1Args); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && @@ -289,8 +297,6 @@ auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); - CC1Args.push_back("-target-cpu"); - CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, @@ -391,6 +397,7 @@ } if (!BoundArch.empty()) { + DAL->eraseArg(options::OPT_mcpu_EQ); DAL->eraseArg(options::OPT_march_EQ); DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch); } Index: clang/test/Driver/amdgpu-macros.cl =================================================================== --- clang/test/Driver/amdgpu-macros.cl +++ clang/test/Driver/amdgpu-macros.cl @@ -310,3 +310,24 @@ // GFX1010-DAG: #define __gfx1010__ 1 // GFX1011-DAG: #define __gfx1011__ 1 // GFX1012-DAG: #define __gfx1012__ 1 + +// GFX600-DAG: #define __amdgcn_processor__ "gfx600" +// GFX601-DAG: #define __amdgcn_processor__ "gfx601" +// GFX700-DAG: #define __amdgcn_processor__ "gfx700" +// GFX701-DAG: #define __amdgcn_processor__ "gfx701" +// GFX702-DAG: #define __amdgcn_processor__ "gfx702" +// GFX703-DAG: #define __amdgcn_processor__ "gfx703" +// GFX704-DAG: #define __amdgcn_processor__ "gfx704" +// GFX801-DAG: #define __amdgcn_processor__ "gfx801" +// GFX802-DAG: #define __amdgcn_processor__ "gfx802" +// GFX803-DAG: #define __amdgcn_processor__ "gfx803" +// GFX810-DAG: #define __amdgcn_processor__ "gfx810" +// GFX900-DAG: #define __amdgcn_processor__ "gfx900" +// GFX902-DAG: #define __amdgcn_processor__ "gfx902" +// GFX904-DAG: #define __amdgcn_processor__ "gfx904" +// GFX906-DAG: #define __amdgcn_processor__ "gfx906" +// GFX908-DAG: #define __amdgcn_processor__ "gfx908" +// GFX909-DAG: #define __amdgcn_processor__ "gfx909" +// GFX1010-DAG: #define __amdgcn_processor__ "gfx1010" +// GFX1011-DAG: #define __amdgcn_processor__ "gfx1011" +// GFX1012-DAG: #define __amdgcn_processor__ "gfx1012" Index: clang/test/Driver/invalid-target-id.hip =================================================================== --- /dev/null +++ clang/test/Driver/invalid-target-id.hip @@ -0,0 +1,46 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: Unsupported CUDA gpu architecture + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908+sramecc+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid target id: gfx908+sramecc+xnack + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908+unknown \ +// RUN: --offload-arch=gfx908+sramecc+unknown \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid target id: gfx908+unknown + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908+sramecc+unknown \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid target id: gfx908+sramecc+unknown + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid target id: gfx900+xnack Index: clang/test/Driver/invalid-target-id.cl =================================================================== --- /dev/null +++ clang/test/Driver/invalid-target-id.cl @@ -0,0 +1,33 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: unknown target CPU 'gfx908xnack' + +// RUN: not %clang -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908+sramecc+xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid target id: gfx908+sramecc+xnack + +// RUN: not %clang -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908+unknown -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid target id: gfx908+unknown + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908+sramecc+unknown -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid target id: gfx908+sramecc+unknown + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx900+xnack -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid target id: gfx900+xnack Index: clang/test/Driver/target-id-macros.hip =================================================================== --- /dev/null +++ clang/test/Driver/target-id-macros.hip @@ -0,0 +1,12 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -E -dM -target x86_64-linux-gnu --cuda-device-only \ +// RUN: --offload-arch=gfx908+xnack-sramecc -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck %s + +// CHECK-DAG: #define __amdgcn_processor__ "gfx908" +// CHECK-DAG: #define __amdgcn_xnack__ 1 +// CHECK-DAG: #define __amdgcn_sramecc__ 0 +// CHECK-DAG: #define __amdgcn_target_id__ "gfx908+xnack-sramecc" Index: clang/test/Driver/target-id-macros.cl =================================================================== --- /dev/null +++ clang/test/Driver/target-id-macros.cl @@ -0,0 +1,20 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908+xnack-sramecc -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck %s + +// RUN: %clang -E -dM -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908+xnack-sramecc -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck %s + +// RUN: %clang -E -dM -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908+xnack-sramecc -nogpulib -o - %s 2>&1 \ +// RUN: | FileCheck %s + +// CHECK-DAG: #define __amdgcn_processor__ "gfx908" +// CHECK-DAG: #define __amdgcn_xnack__ 1 +// CHECK-DAG: #define __amdgcn_sramecc__ 0 +// CHECK-DAG: #define __amdgcn_target_id__ "gfx908+xnack-sramecc" Index: clang/test/Driver/target-id.hip =================================================================== --- /dev/null +++ clang/test/Driver/target-id.hip @@ -0,0 +1,54 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908+xnack+sramecc \ +// RUN: --offload-arch=gfx908+xnack-sramecc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" "-fcuda-is-device" + +// CHECK: [[OPT:".*opt"]] {{".*-gfx908-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906_BC:".*-gfx908-optimized.*bc"]] + +// CHECK: [[LLC: ".*llc"]] [[OPT_906_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" +// CHECK-SAME: "-o" {{".*-gfx908-.*o"}} + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" +// CHECK-SAME: {{.*}} "-target-feature" "+xnack" +// CHECK-SAME: {{.*}} "-target-feature" "+sram-ecc" + +// CHECK: [[OPT]] {{".*-gfx908\+xnack\+sramecc.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906XE_BC:".*-gfx908\+xnack\+sramecc.*bc"]] + +// CHECK: [[LLC]] [[OPT_906XE_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" +// CHECK-SAME: "-o" {{".*-gfx908\+xnack\+sramecc.*o"}} + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" +// CHECK-SAME: {{.*}} "-target-feature" "+xnack" +// CHECK-SAME: {{.*}} "-target-feature" "-sram-ecc" + +// CHECK: [[OPT]] {{".*-gfx908\+xnack-sramecc.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906XN_BC:".*-gfx908\+xnack-sramecc.*bc"]] + +// CHECK: [[LLC]] [[OPT_906XN_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" +// CHECK-SAME: "-o" {{".*-gfx908\+xnack-sramecc.*o"}} + + +// CHECK: {{".*clang-offload-bundler"}} +// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx908,hip-amdgcn-amd-amdhsa-gfx908+xnack+sramecc,hip-amdgcn-amd-amdhsa-gfx908+xnack-sramecc" Index: clang/test/Driver/target-id.cl =================================================================== --- /dev/null +++ clang/test/Driver/target-id.cl @@ -0,0 +1,21 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx908+xnack-sramecc \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + + +// RUN: %clang -### -target amdgcn-amd-amdpal \ +// RUN: -mcpu=gfx908+xnack-sramecc \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + + +// RUN: %clang -### -target amdgcn--mesa3d \ +// RUN: -mcpu=gfx908+xnack-sramecc \ +// RUN: -nostdlib %s 2>&1 | FileCheck %s + +// CHECK: "-target-cpu" "gfx908" +// CHECK-SAME: "-target-feature" "+xnack" +// CHECK-SAME: "-target-feature" "-sram-ecc"