Index: clang/include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticDriverKinds.td +++ clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -55,6 +55,14 @@ def err_drv_no_cuda_libdevice : Error< "cannot find libdevice for %0. Provide path to different CUDA installation " "via --cuda-path, or pass -nocudalib to build without linking with libdevice.">; + +def err_drv_no_rocm_installation : Error< + "cannot find ROCm installation. Provide its path via --rocm-path, or pass " + "-nogpulib.">; +def err_drv_no_rocm_device_lib : Error< + "cannot find device library for %0. Provide path to different ROCm installation " + "via --rocm-path, or pass -nogpulib to build without linking default libraries.">; + def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a different CUDA " Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -608,6 +608,8 @@ def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>, HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">; def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">; +def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, + HelpText<"ROCm installation path">; def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group, HelpText<"HIP device library path">; def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group, Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -4857,6 +4857,8 @@ TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::AMDHSA: + TC = std::make_unique(*this, Target, Args); + break; case llvm::Triple::AMDPAL: case llvm::Triple::Mesa3D: TC = std::make_unique(*this, Target, Args); Index: clang/lib/Driver/ToolChains/AMDGPU.h =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.h +++ clang/lib/Driver/ToolChains/AMDGPU.h @@ -13,12 +13,154 @@ #include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/TargetParser.h" #include namespace clang { namespace driver { + +/// A class to find a viable ROCM installation +/// TODO: Generalize to handle libclc. +class RocmInstallationDetector { +private: + struct ConditionalLibrary { + SmallString<0> On; + SmallString<0> Off; + + bool isValid() const { + return !On.empty() && !Off.empty(); + } + + StringRef get(bool Enabled) const { + assert(isValid()); + return Enabled ? On : Off; + } + }; + + const Driver &D; + bool IsValid = false; + //RocmVersion Version = RocmVersion::UNKNOWN; + SmallString<0> InstallPath; + //SmallString<0> BinPath; + SmallString<0> LibPath; + SmallString<0> LibDevicePath; + SmallString<0> IncludePath; + llvm::StringMap LibDeviceMap; + + // Libraries that are always linked. + SmallString<0> OCML; + SmallString<0> OCKL; + + // Libraries that are always linked depending on the language + SmallString<0> OpenCL; + SmallString<0> HIP; + + // Libraries swapped based on compile flags. + ConditionalLibrary WavefrontSize64; + ConditionalLibrary FiniteOnly; + ConditionalLibrary UnsafeMath; + ConditionalLibrary DenormalsAreZero; + ConditionalLibrary CorrectlyRoundedSqrt; + + bool allGenericLibsValid() const { + return !OCML.empty() && !OCKL.empty() && !OpenCL.empty() && !HIP.empty() && + WavefrontSize64.isValid() && FiniteOnly.isValid() && + UnsafeMath.isValid() && DenormalsAreZero.isValid() && + CorrectlyRoundedSqrt.isValid(); + } + + // CUDA architectures for which we have raised an error in + // CheckRocmVersionSupportsArch. + mutable llvm::SmallSet ArchsWithBadVersion; + +public: + RocmInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, + const llvm::opt::ArgList &Args); + + /// Add arguments needed to link default bitcode libraries. + void addCommonBitcodeLibCC1Args(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + StringRef LibDeviceFile, bool Wave64, + bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, + bool FastRelaxedMath, bool CorrectSqrt) const; + + /// Emit an error if Version does not support the given Arch. + /// + /// If either Version or Arch is unknown, does not emit an error. Emits at + /// most one error per Arch. + void CheckRocmVersionSupportsArch(CudaArch Arch) const; + + /// Check whether we detected a valid Rocm install. + bool isValid() const { return IsValid; } + /// Print information about the detected CUDA installation. + void print(raw_ostream &OS) const; + + /// Get the detected Rocm install's version. + // RocmVersion version() const { return Version; } + + /// Get the detected Rocm installation path. + StringRef getInstallPath() const { return InstallPath; } + + /// Get the detected path to Rocm's bin directory. + // StringRef getBinPath() const { return BinPath; } + + /// Get the detected Rocm Include path. + StringRef getIncludePath() const { return IncludePath; } + + /// Get the detected Rocm library path. + StringRef getLibPath() const { return LibPath; } + + /// Get the detected Rocm device library path. + StringRef getLibDevicePath() const { return LibDevicePath; } + + StringRef getOCMLPath() const { + assert(!OCML.empty()); + return OCML; + } + + StringRef getOCKLPath() const { + assert(!OCKL.empty()); + return OCKL; + } + + StringRef getOpenCLPath() const { + assert(!OpenCL.empty()); + return OpenCL; + } + + StringRef getHIPPath() const { + assert(!HIP.empty()); + return HIP; + } + + StringRef getWavefrontSize64Path(bool Enabled) const { + return WavefrontSize64.get(Enabled); + } + + StringRef getFiniteOnlyPath(bool Enabled) const { + return FiniteOnly.get(Enabled); + } + + StringRef getUnsafeMathPath(bool Enabled) const { + return UnsafeMath.get(Enabled); + } + + StringRef getDenormalsAreZeroPath(bool Enabled) const { + return DenormalsAreZero.get(Enabled); + } + + StringRef getCorrectlyRoundedSqrtPath(bool Enabled) const { + return CorrectlyRoundedSqrt.get(Enabled); + } + + /// Get libdevice file for given architecture + std::string getLibDeviceFile(StringRef Gpu) const { + return LibDeviceMap.lookup(Gpu); + } +}; + namespace tools { namespace amdgpu { @@ -42,11 +184,9 @@ namespace toolchains { class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { - -private: +protected: const std::map OptionsDefault; -protected: Tool *buildLinker() const override; const StringRef getOptionDefault(options::ID OptID) const { auto opt = OptionsDefault.find(OptID); @@ -79,6 +219,19 @@ const llvm::fltSemantics *FPType = nullptr) const override; }; +class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { +private: + RocmInstallationDetector RocmInstallation; + +public: + ROCMToolChain(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args); + void + addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const override; +}; + } // end namespace toolchains } // end namespace driver } // end namespace clang Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,7 +12,8 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" #include "llvm/Option/ArgList.h" -#include "llvm/Support/TargetParser.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" using namespace clang::driver; using namespace clang::driver::tools; @@ -20,6 +21,162 @@ using namespace clang; using namespace llvm::opt; +RocmInstallationDetector::RocmInstallationDetector( + const Driver &D, const llvm::Triple &HostTriple, + const llvm::opt::ArgList &Args) + : D(D) { + struct Candidate { + std::string Path; + bool StrictChecking; + + Candidate(std::string Path, bool StrictChecking = false) + : Path(Path), StrictChecking(StrictChecking) {} + }; + + SmallVector Candidates; + + if (Args.hasArg(clang::driver::options::OPT_rocm_path_EQ)) { + Candidates.emplace_back( + Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str()); + } else { + // Try to find relative to the compiler binary. + const char *InstallDir = D.getInstalledDir(); + + // Check both a normal Unix prefix position of the clang binary, as well as + // the Windows-esque layout the ROCm packages use with the host architecture + // subdirectory of bin. + + StringRef ParentDir = llvm::sys::path::parent_path(InstallDir); + if (ParentDir == HostTriple.getArchName()) + ParentDir = llvm::sys::path::parent_path(ParentDir); + + if (ParentDir == "bin") { + Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(), + /*StrictChecking=*/true); + } + + Candidates.emplace_back(D.SysRoot + "/opt/rocm"); + } + + bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib); + + for (const auto &Candidate : Candidates) { + InstallPath = Candidate.Path; + if (InstallPath.empty() || !D.getVFS().exists(InstallPath)) + continue; + + // FIXME: The install path situation is a real mess. + + // For a cmake install, these are placed directly in + // ${INSTALL_PREFIX}/lib + + // In the separate OpenCL builds, the bitcode libraries are placed in + // ${OPENCL_ROOT}/lib/x86_64/bitcode/* + + // For the rocm installed packages, these are placed at + // /opt/rocm/opencl/lib/x86_64/bitcode + + // An additional copy is installed, in scattered locations between + // /opt/rocm/hcc/rocdl/oclc + // /opt/rocm/hcc/rocdl/ockl + // /opt/rocm/hcc/rocdl/lib + // + // Yet another complete set is installed to + // /opt/rocm/hcc/rocdl/lib + + // For now just recognize the opencl package layout. + + // BinPath = InstallPath + "/bin"; + llvm::sys::path::append(IncludePath, InstallPath, "include"); + llvm::sys::path::append(LibDevicePath, InstallPath, "lib"); + + auto &FS = D.getVFS(); + + // We don't need the include path for OpenCL, since clang already ships with + // the default header. + + bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking); + if (CheckLibDevice && !FS.exists(LibDevicePath)) + continue; + + const StringRef Suffix(".amdgcn.bc"); + + std::error_code EC; + for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef FilePath = LI->path(); + StringRef FileName = llvm::sys::path::filename(FilePath); + if (!FileName.endswith(Suffix)) + continue; + + StringRef BaseName = FileName.drop_back(Suffix.size()); + + if (BaseName == "ocml") { + OCML = FilePath; + } else if (BaseName == "ockl") { + OCKL = FilePath; + } else if (BaseName == "opencl") { + OpenCL = FilePath; + } else if (BaseName == "hip") { + HIP = FilePath; + } else if (BaseName == "oclc_finite_only_off") { + FiniteOnly.Off = FilePath; + } else if (BaseName == "oclc_finite_only_on") { + FiniteOnly.On = FilePath; + } else if (BaseName == "oclc_daz_opt_on") { + DenormalsAreZero.On = FilePath; + } else if (BaseName == "oclc_daz_opt_off") { + DenormalsAreZero.Off = FilePath; + } else if (BaseName == "oclc_correctly_rounded_sqrt_on") { + CorrectlyRoundedSqrt.On = FilePath; + } else if (BaseName == "oclc_correctly_rounded_sqrt_off") { + CorrectlyRoundedSqrt.Off = FilePath; + } else if (BaseName == "oclc_unsafe_math_on") { + UnsafeMath.On = FilePath; + } else if (BaseName == "oclc_unsafe_math_off") { + UnsafeMath.Off = FilePath; + } else if (BaseName == "oclc_wavefrontsize64_on") { + WavefrontSize64.On = FilePath; + } else if (BaseName == "oclc_wavefrontsize64_off") { + WavefrontSize64.Off = FilePath; + } else { + // Process all bitcode filenames that look like + // ocl_isa_version_XXX.amdgcn.bc + const StringRef DeviceLibPrefix = "oclc_isa_version_"; + if (!BaseName.startswith(DeviceLibPrefix)) + continue; + + StringRef IsaVersionNumber = + BaseName.drop_front(DeviceLibPrefix.size()); + + llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber; + SmallString<8> Tmp; + LibDeviceMap.insert( + std::make_pair(GfxName.toStringRef(Tmp), FilePath.str())); + } + } + + if (!NoBuiltinLibs) { + // Check that the required non-target libraries are all available. + if (!allGenericLibsValid()) + continue; + + // Check that we have found at least one libdevice that we can link in if + // -nobuiltinlib hasn't been specified. + if (LibDeviceMap.empty()) + continue; + } + + IsValid = true; + break; + } +} + +void RocmInstallationDetector::print(raw_ostream &OS) const { + if (isValid()) + OS << "Found ROCm installation: " << InstallPath << '\n'; +} + void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -142,6 +299,12 @@ llvm::DenormalMode::getIEEE(); } +/// ROCM Toolchain +ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) + : AMDGPUToolChain(D, Triple, Args), + RocmInstallation(D, Triple, Args) { } + void AMDGPUToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, @@ -155,3 +318,89 @@ CC1Args.push_back("-fapply-global-visibility-to-externs"); } } + +void ROCMToolChain::addClangTargetOptions( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadingKind) const { + AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, + DeviceOffloadingKind); + + if (DriverArgs.hasArg(options::OPT_nogpulib)) + return; + + if (!RocmInstallation.isValid()) { + getDriver().Diag(diag::err_drv_no_rocm_installation); + return; + } + + // Get the device name and canonicalize it + const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); + const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); + std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); + if (LibDeviceFile.empty()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << GpuArch; + return; + } + + const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); + static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); + + bool Wave64 = !HasWave32 || DriverArgs.hasFlag( + options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); + + // TODO: There are way too many flags that change this. Do we need to check + // them all? + bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || + getDefaultDenormsAreZeroForTarget(Kind); + bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); + + bool UnsafeMathOpt = + DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); + bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); + bool CorrectSqrt = + DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); + + // Add the OpenCL specific bitcode library. + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath())); + + // Add the generic set of libraries. + RocmInstallation.addCommonBitcodeLibCC1Args( + DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly, + UnsafeMathOpt, FastRelaxedMath, CorrectSqrt); +} + +void RocmInstallationDetector::addCommonBitcodeLibCC1Args( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly, + bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const { + static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode"; + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath())); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath())); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString( + getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString( + getFiniteOnlyPath(FiniteOnly || FastRelaxedMath))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back( + DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); +} Index: clang/lib/Driver/ToolChains/HIP.h =================================================================== --- clang/lib/Driver/ToolChains/HIP.h +++ clang/lib/Driver/ToolChains/HIP.h @@ -73,7 +73,7 @@ namespace toolchains { -class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public AMDGPUToolChain { +class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public ROCMToolChain { public: HIPToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const llvm::opt::ArgList &Args); Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -268,7 +268,7 @@ HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) - : AMDGPUToolChain(D, Triple, Args), HostTC(HostTC) { + : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { // Lookup binaries into the driver directory, this is used to // discover the clang-offload-bundler executable. getProgramPaths().push_back(getDriver().Dir); Index: clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl +++ clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl @@ -1,5 +1,5 @@ -// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s -// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s +// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s +// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s // CHECK-DAG: ![[DWARF_ADDRESS_SPACE_NONE:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}}) // CHECK-DAG: ![[DWARF_ADDRESS_SPACE_LOCAL:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}}, dwarfAddressSpace: 2) Index: clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl +++ clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl @@ -1,5 +1,5 @@ -// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s -// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s +// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s +// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s // CHECK-DAG: ![[FILEVAR0:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true) // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR0]], expr: !DIExpression()) Index: clang/test/Driver/amdgpu-visibility.cl =================================================================== --- clang/test/Driver/amdgpu-visibility.cl +++ clang/test/Driver/amdgpu-visibility.cl @@ -2,6 +2,10 @@ // RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility=protected %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED %s // RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility-ms-compat %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS %s +// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm %s 2>&1 | FileCheck -check-prefix=DEFAULT %s +// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm -fvisibility=protected %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED %s +// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm -fvisibility-ms-compat %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS %s + // DEFAULT-DAG: "-fvisibility" "hidden" // DEFAULT-DAG: "-fapply-global-visibility-to-externs" Index: clang/test/Driver/rocm-detect.cl =================================================================== --- /dev/null +++ clang/test/Driver/rocm-detect.cl @@ -0,0 +1,21 @@ +// REQUIRES: clang-driver +// REQUIRES: amdgpu-registered-target + +// Make sure the appropriate device specific library is available. + +// We don't include every target in the test directory, so just pick a valid +// target not included in the test. + +// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s + + +// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 -nogpulib \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX902,NODEFAULTLIBS %s + + +// GFX902-DEFAULTLIBS: error: cannot find device library for gfx902. Provide path to different ROCm installation via --rocm-path, or pass -nogpulib to build without linking default libraries. + +// NODEFAULTLIBS-NOT: error: cannot find Index: clang/test/Driver/rocm-device-libs.cl =================================================================== --- /dev/null +++ clang/test/Driver/rocm-device-libs.cl @@ -0,0 +1,163 @@ +// REQUIRES: clang-driver +// REQUIRES: amdgpu-registered-target + +// Test flush-denormals-to-zero enabled uses oclc_daz_opt_on + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s + + + +// Make sure the different denormal default is respected for gfx8 +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx803 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s + + + +// Make sure the non-canonical name works +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=fiji \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s + + + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx900 \ +// RUN: -cl-denorms-are-zero \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s + + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx803 \ +// RUN: -cl-denorms-are-zero \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s + + + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx803 \ +// RUN: -cl-finite-math-only \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s + + + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx803 \ +// RUN: -cl-fp32-correctly-rounded-divide-sqrt \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s + + + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx803 \ +// RUN: -cl-fast-relaxed-math \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s + + + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx803 \ +// RUN: -cl-unsafe-math-optimizations \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx1010 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx1011 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1011,WAVE32 %s + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx1012 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1012,WAVE32 %s + + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE64 %s + +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s + +// Ignore -mno-wavefrontsize64 without wave32 support +// RUN: %clang -### -target amdgcn-amd-amdhsa \ +// RUN: -x cl -mcpu=gfx803 -mno-wavefrontsize64 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs \ +// RUN: %S/opencl.cl \ +// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX803,WAVE64 %s + + + +// COMMON: "-triple" "amdgcn-amd-amdhsa" +// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/opencl.amdgcn.bc" +// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/ocml.amdgcn.bc" +// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/ockl.amdgcn.bc" + +// GFX900-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_off.amdgcn.bc" +// GFX803-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc" +// GFX700-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc" +// COMMON-DAZ-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc" + + +// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc" +// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc" +// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc" + + +// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc" +// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_on.amdgcn.bc" +// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc" + + +// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc" +// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc" +// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc" + + +// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_on.amdgcn.bc" +// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_on.amdgcn.bc" +// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc" + + +// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_on.amdgcn.bc" +// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc" +// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc" + +// WAVE64: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_wavefrontsize64_on.amdgcn.bc" +// WAVE32: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_wavefrontsize64_off.amdgcn.bc" + + +// GFX900: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_isa_version_900.amdgcn.bc" +// GFX803: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_isa_version_803.amdgcn.bc" Index: clang/test/Driver/rocm-not-found.cl =================================================================== --- /dev/null +++ clang/test/Driver/rocm-not-found.cl @@ -0,0 +1,11 @@ +// REQUIRES: clang-driver + +// Check that we raise an error if we're trying to compile OpenCL for amdhsa code but can't +// find a ROCm install, unless -nogpulib was passed. + +// RUN: %clang -### --sysroot=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR +// RUN: %clang -### --rocm-path=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR +// ERR: cannot find ROCm installation. Provide its path via --rocm-path, or pass -nogpulib. + +// RUN: %clang -### -nogpulib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck %s --check-prefix OK +// OK-NOT: cannot find ROCm installation. Index: llvm/include/llvm/Support/TargetParser.h =================================================================== --- llvm/include/llvm/Support/TargetParser.h +++ llvm/include/llvm/Support/TargetParser.h @@ -151,7 +151,10 @@ // Common features. FEATURE_FAST_FMA_F32 = 1 << 4, - FEATURE_FAST_DENORMAL_F32 = 1 << 5 + FEATURE_FAST_DENORMAL_F32 = 1 << 5, + + // Wavefront 32 is available. + FEATURE_WAVE32 = 1 << 6 }; StringRef getArchNameAMDGCN(GPUKind AK); Index: llvm/lib/Support/TargetParser.cpp =================================================================== --- llvm/lib/Support/TargetParser.cpp +++ llvm/lib/Support/TargetParser.cpp @@ -99,9 +99,9 @@ {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, - {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, + {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32}, }; const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) {