Index: include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- include/clang/Basic/DiagnosticDriverKinds.td +++ include/clang/Basic/DiagnosticDriverKinds.td @@ -41,6 +41,7 @@ "--no-cuda-version-check.">; def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">; def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">; +def err_drv_hip_bad_target_id : Error<"Invalid HIP offloading target id: %0">; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< Index: include/clang/Basic/HIP.h =================================================================== --- /dev/null +++ include/clang/Basic/HIP.h @@ -0,0 +1,38 @@ +//===--- HIP.h - Utilities for compiling HIP code --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_HIP_H +#define LLVM_CLANG_BASIC_HIP_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" + +namespace clang { + +namespace HIP { +/// Defines all feature strings that can be used in offloading target id +/// for each device. Offloading target id is a device name with optional +/// target feature strings delimited by a plus sign, e.g. +/// gfx906+xnack+sram-ecc. Target feature itself cannot contain plus sign. +/// Each device have limited number of predefined target features which +/// have to follow predefined order when showing up in a offloading target +/// id. +struct OffloadingTargetIdFeatures { + llvm::StringMap> Features; + OffloadingTargetIdFeatures() { Features["gfx906"] = {"xnack", "sram-ecc"}; } +}; + +/// Get all feature strings that can be used in code object target id for +/// \m Device. +const llvm::SmallVector & +getAllPossibleTargetIdFeatures(llvm::StringRef Device); + +} // namespace HIP +} // namespace clang + +#endif Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -587,6 +587,11 @@ def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>, HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">; def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">; +def offloading_target_id_EQ : Joined<["--"], "offloading-target-id=">, Flags<[DriverOption]>, + HelpText<"HIP offloading target id (e.g. gfx906+xnack). May be specified more than once.">; +def no_offloading_target_id_EQ : Joined<["--"], "no-offloading-target-id=">, Flags<[DriverOption]>, + HelpText<"Remove HIP offloading target id (e.g. gfx906+xnack) from the list of target ids to compile for." + "'all' resets the list to its default value.">; def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group, HelpText<"HIP device library path">; def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group, Index: lib/Basic/CMakeLists.txt =================================================================== --- lib/Basic/CMakeLists.txt +++ lib/Basic/CMakeLists.txt @@ -48,6 +48,7 @@ FileManager.cpp FileSystemStatCache.cpp FixedPoint.cpp + HIP.cpp IdentifierTable.cpp LangOptions.cpp Module.cpp Index: lib/Basic/HIP.cpp =================================================================== --- /dev/null +++ lib/Basic/HIP.cpp @@ -0,0 +1,21 @@ +//===--- HIP.cpp - Utilities for compiling HIP code ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/HIP.h" + +namespace clang { + +namespace HIP { + +const llvm::SmallVector & +getAllPossibleTargetIdFeatures(llvm::StringRef Device) { + static OffloadingTargetIdFeatures F; + return F.Features[Device]; +} +} // namespace HIP +} // namespace clang Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -43,6 +43,7 @@ #include "ToolChains/TCE.h" #include "ToolChains/WebAssembly.h" #include "ToolChains/XCore.h" +#include "clang/Basic/HIP.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" @@ -2282,8 +2283,20 @@ bool CompileHostOnly = false; bool CompileDeviceOnly = false; + /// Id to identify each device compilation. For CUDA it is simply the + /// GPU arch string. For HIP it is either the GPU arch string or GPU + /// arch string plus feature strings delimited by a plus sign, e.g. + /// gfx906+xnack. + struct OffloadingTargetId { + /// Target id string which is persistent throughout the compilation. + const char *Id; + OffloadingTargetId(CudaArch Arch) { Id = CudaArchToString(Arch); } + OffloadingTargetId(const char *Id) : Id(Id) {} + operator const char *() { return Id; } + operator StringRef() { return StringRef(Id); } + }; /// List of GPU architectures to use in this compilation. - SmallVector GpuArchList; + SmallVector GpuArchList; /// The CUDA actions for the current input. ActionList CudaDeviceActions; @@ -2362,7 +2375,7 @@ for (auto Arch : GpuArchList) { CudaDeviceActions.push_back(UA); - UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch), + UA->registerDependentActionInfo(ToolChains[0], Arch, AssociatedOffloadKind); } return ABRT_Success; @@ -2373,10 +2386,9 @@ void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. - auto AddTopLevel = [&](Action *A, CudaArch BoundArch) { + auto AddTopLevel = [&](Action *A, OffloadingTargetId TargetId) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch), - AssociatedOffloadKind); + Dep.add(*A, *ToolChains.front(), TargetId, AssociatedOffloadKind); AL.push_back(C.MakeAction(Dep, A->getType())); }; @@ -2482,7 +2494,7 @@ // Default to sm_20 which is the lowest common denominator for // supported GPUs. sm_20 code should work correctly, if // suboptimally, on all newer GPUs. - if (GpuArchList.empty()) + if (GpuArchList.empty() && AssociatedOffloadKind != Action::OFK_HIP) GpuArchList.push_back(CudaArch::SM_20); return Error; @@ -2556,8 +2568,7 @@ for (auto &A : {AssembleAction, BackendAction}) { OffloadAction::DeviceDependences DDep; - DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]), - Action::OFK_Cuda); + DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); DeviceActions.push_back( C.MakeAction(DDep, A->getType())); } @@ -2606,6 +2617,7 @@ class HIPActionBuilder final : public CudaActionBuilderBase { /// The linker inputs obtained for each device arch. SmallVector DeviceLinkerInputs; + const char *const DefaultTargetId = "gfx900"; public: HIPActionBuilder(Compilation &C, DerivedArgList &Args, @@ -2654,8 +2666,8 @@ // device arch of the next action being propagated to the above link // action. OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction( DDep, CudaDeviceActions[I]->getType()); } @@ -2710,11 +2722,73 @@ for (auto &LI : DeviceLinkerInputs) { auto *DeviceLinkAction = C.MakeAction(LI, types::TY_Image); - DA.add(*DeviceLinkAction, *ToolChains[0], - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DA.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I], + AssociatedOffloadKind); ++I; } } + bool initialize() override { + if (CudaActionBuilderBase::initialize()) + return true; + + auto IsValidTargetId = [](StringRef IdStr) { + auto Split = IdStr.split('+'); + const StringRef ArchStr = Split.first; + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) + return false; + StringRef Features = Split.second; + if (Features.empty()) + return true; + auto &AllFeatures = HIP::getAllPossibleTargetIdFeatures(ArchStr); + unsigned CurIndex = 0; + while (!Features.empty()) { + auto Splits = Features.split('+'); + for (; CurIndex < AllFeatures.size(); ++CurIndex) { + if (Splits.first == AllFeatures[CurIndex]) + break; + } + if (CurIndex == AllFeatures.size()) + return false; + Features = Splits.second; + } + return true; + }; + // Collect all offloading_target_id parameters, removing duplicates. + std::set TargetIds; + for (Arg *A : Args) { + if (!(A->getOption().matches(options::OPT_offloading_target_id_EQ) || + A->getOption().matches(options::OPT_no_offloading_target_id_EQ))) + continue; + A->claim(); + + const StringRef IdStr = A->getValue(); + if (A->getOption().matches(options::OPT_offloading_target_id_EQ) && + IdStr == "all") { + TargetIds.clear(); + continue; + } + if (!IsValidTargetId(IdStr)) { + C.getDriver().Diag(clang::diag::err_drv_hip_bad_target_id) << IdStr; + return true; + } else if (A->getOption().matches(options::OPT_offloading_target_id_EQ)) + TargetIds.insert(IdStr); + else if (A->getOption().matches( + options::OPT_no_offloading_target_id_EQ)) + TargetIds.erase(IdStr); + else + llvm_unreachable("Unexpected option."); + } + + // Collect list of target ids remaining in the set. + for (auto Id : TargetIds) + GpuArchList.push_back(Id.data()); + + if (GpuArchList.empty()) + GpuArchList.push_back(DefaultTargetId); + + return false; + } }; /// OpenMP action builder. The host bitcode is passed to the device frontend Index: lib/Driver/ToolChains/HIP.cpp =================================================================== --- lib/Driver/ToolChains/HIP.cpp +++ lib/Driver/ToolChains/HIP.cpp @@ -10,10 +10,12 @@ #include "CommonArgs.h" #include "InputInfo.h" #include "clang/Basic/Cuda.h" +#include "clang/Basic/HIP.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -77,6 +79,7 @@ Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName) const { + SubArchName = SubArchName.split('+').first; // Construct opt command. ArgStringList OptArgs; // The input to opt is the output from llvm-link. @@ -125,6 +128,9 @@ Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName) const { + + SubArchName = SubArchName.split('+').first; + // Construct llc command. // FIXME: -disable-promote-alloca-to-lds is a workaround for issues in // AMDGPUPromoteAlloca pass which cause invalid memory access in PyTorch. @@ -263,6 +269,9 @@ HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + auto Splits = GpuArch.split('+'); + StringRef FeatureStr = Splits.second; + GpuArch = Splits.first; assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && @@ -270,6 +279,26 @@ CC1Args.push_back("-target-cpu"); CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); + + llvm::StringSet<> FeatureSet; + while (!FeatureStr.empty()) { + auto Splits = FeatureStr.split('+'); + FeatureSet.insert(Splits.first); + FeatureStr = Splits.second; + } + + // Iterate through all possible target id features for the given GPU, + // If it is contained in the target id, pass -mfeature to clang -cc1, + // otherwise pass -mno-feature to clang -cc1. + for (auto Feature : HIP::getAllPossibleTargetIdFeatures(GpuArch)) { + std::string Opt = "-m"; + if (FeatureSet.count(Feature)) + Opt = Opt + Feature.str(); + else + Opt = Opt + "no-" + Feature.str(); + CC1Args.push_back(DriverArgs.MakeArgStringRef(Opt)); + } + CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, Index: test/Driver/hip-invalid-offloading-target-id.hip =================================================================== --- /dev/null +++ test/Driver/hip-invalid-offloading-target-id.hip @@ -0,0 +1,48 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offloading-target-id=gfx906 \ +// RUN: --offloading-target-id=gfx906xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: Invalid HIP offloading target id: gfx906xnack + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offloading-target-id=gfx906 \ +// RUN: --offloading-target-id=gfx906+sram-ecc+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid HIP offloading target id: gfx906+sram-ecc+xnack + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offloading-target-id=gfx906 \ +// RUN: --offloading-target-id=gfx906+unknown \ +// RUN: --offloading-target-id=gfx906+sram-ecc+unknown \ +// RUN: --offloading-target-id=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid HIP offloading target id: gfx906+unknown + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offloading-target-id=gfx906 \ +// RUN: --offloading-target-id=gfx906+sram-ecc+unknown \ +// RUN: --offloading-target-id=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid HIP offloading target id: gfx906+sram-ecc+unknown + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offloading-target-id=gfx906 \ +// RUN: --offloading-target-id=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid HIP offloading target id: gfx900+xnack + + Index: test/Driver/hip-offloading-target-id.hip =================================================================== --- /dev/null +++ test/Driver/hip-offloading-target-id.hip @@ -0,0 +1,55 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offloading-target-id=gfx906 \ +// RUN: --offloading-target-id=gfx906+xnack \ +// RUN: --offloading-target-id=gfx906+xnack+sram-ecc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx906" "-mno-xnack" "-mno-sram-ecc" + +// CHECK: [[OPT:".*opt"]] {{".*-gfx906-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx906" +// CHECK-SAME: "-o" [[OPT_906_BC:".*-gfx906-optimized.*bc"]] + +// CHECK: [[LLC: ".*llc"]] [[OPT_906_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" "-filetype=obj" +// CHECK-SAME: {{.*}} "-mcpu=gfx906" +// CHECK-SAME: "-o" {{".*-gfx906-.*o"}} + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx906" "-mxnack" "-mno-sram-ecc" + +// CHECK: [[OPT]] {{".*-gfx906\+xnack.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx906" +// CHECK-SAME: "-o" [[OPT_906XN_BC:".*-gfx906\+xnack.*bc"]] + +// CHECK: [[LLC]] [[OPT_906XN_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" "-filetype=obj" +// CHECK-SAME: {{.*}} "-mcpu=gfx906" +// CHECK-SAME: "-o" {{".*-gfx906\+xnack.*o"}} + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx906" "-mxnack" "-msram-ecc" + +// CHECK: [[OPT]] {{".*-gfx906\+xnack\+sram-ecc.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx906" +// CHECK-SAME: "-o" [[OPT_906XE_BC:".*-gfx906\+xnack\+sram-ecc.*bc"]] + +// CHECK: [[LLC]] [[OPT_906XE_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" "-filetype=obj" +// CHECK-SAME: {{.*}} "-mcpu=gfx906" +// CHECK-SAME: "-o" {{".*-gfx906\+xnack\+sram-ecc.*o"}} + +// CHECK: {{".*clang-offload-bundler"}} +// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx906+xnack,hip-amdgcn-amd-amdhsa-gfx906+xnack+sram-ecc"