Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -147,7 +147,7 @@ RocmInstallationDetector::RocmInstallationDetector( const Driver &D, const llvm::Triple &HostTriple, - const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) + const llvm::opt::ArgList &Args, bool DetectHIPRuntime) : D(D) { RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ); RocmDeviceLibPathArg = @@ -184,11 +184,9 @@ if (DetectHIPRuntime) detectHIPRuntime(); - if (DetectDeviceLib) - detectDeviceLibrary(); } -void RocmInstallationDetector::detectDeviceLibrary() { +void RocmInstallationDetector::detectDeviceLibrary(llvm::Triple Triple) { assert(LibDevicePath.empty()); if (!RocmDeviceLibPathArg.empty()) @@ -209,6 +207,23 @@ return; } + { + // Try the resource directory. + // Look in amdhsa/amdgcn subdirectory. + SmallString<0> ResourceSubDir(D.ResourceDir); + + // XXX - Should we keep bitcode subdirectory or re-add .amdgcn to the + // suffix? The compiler-rt libraries use libclang_rt.{name}-{i386|x86_64}.a + // in the same subdirectory. + llvm::sys::path::append(ResourceSubDir, "lib", + llvm::Triple::getOSTypeName(Triple.getOS()), + llvm::Triple::getArchTypeName(Triple.getArch())); + scanLibDevicePath(ResourceSubDir); + HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); + if (HasDeviceLibrary) + return; + } + // The install path situation in old versions of ROCm is a real mess, and // use a different install layout. Multiple copies of the device libraries // exist for each frontend project, and differ depending on which build @@ -490,7 +505,7 @@ ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : AMDGPUToolChain(D, Triple, Args) { - RocmInstallation.detectDeviceLibrary(); + RocmInstallation.detectDeviceLibrary(Triple); } void AMDGPUToolChain::addClangTargetOptions( Index: clang/lib/Driver/ToolChains/ROCm.h =================================================================== --- clang/lib/Driver/ToolChains/ROCm.h +++ clang/lib/Driver/ToolChains/ROCm.h @@ -114,8 +114,7 @@ public: RocmInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args, - bool DetectHIPRuntime = true, - bool DetectDeviceLib = false); + bool DetectHIPRuntime = true); /// Add arguments needed to link default bitcode libraries. void addCommonBitcodeLibCC1Args(const llvm::opt::ArgList &DriverArgs, @@ -205,7 +204,7 @@ void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const; - void detectDeviceLibrary(); + void detectDeviceLibrary(llvm::Triple); void detectHIPRuntime(); /// Get the values for --rocm-device-lib-path arguments Index: clang/test/Driver/hip-device-libs-resource-dir.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-device-libs-resource-dir.hip @@ -0,0 +1,120 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// Test if oclc_daz_opt_on or if oclc_daz_opt_off is linked depending on +// expected denormal mode. + +// Test subtarget with flushing on by default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD + + +// Test subtarget with flushing off by ddefault. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD + + +// Test explicit flag, opposite of target default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -fcuda-flush-denormals-to-zero \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD + + +// Test explicit flag, opposite of target default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -fno-cuda-flush-denormals-to-zero \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD + + +// Test explicit flag, same as target default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -fno-cuda-flush-denormals-to-zero \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD + + +// Test explicit flag, same as target default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -fcuda-flush-denormals-to-zero \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD + + +// Test last flag wins, not flushing +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD + + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD + + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD + + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD + + +// Test --hip-device-lib-path flag +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD + + +// Test environment variable HIP_DEVICE_LIB_PATH +// RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode \ +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL + +// ALL: {{"[^"]*clang[^"]*"}} +// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc" +// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}ocml.bc" +// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}ockl.bc" + +// FLUSHD-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_on.bc" +// NOFLUSHD-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" + +// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc" +// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc" +// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" +// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc" +// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc"