Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -936,6 +936,9 @@ def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " "__cyg_profile_func_enter and __cyg_profile_func_exit">; +def fgpu_sanitize : Flag<["-"], "fgpu-sanitize">, + HelpText<"Enable sanitizer for AMDGPU target.">; +def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">; def cuid_EQ : Joined<["-"], "cuid=">, Flags<[CC1Option]>, HelpText<"An ID for compilation unit, which should be the same for the same " "compilation unit but different for different compilation units. " Index: clang/include/clang/Driver/ToolChain.h =================================================================== --- clang/include/clang/Driver/ToolChain.h +++ clang/include/clang/Driver/ToolChain.h @@ -688,6 +688,13 @@ virtual void AddFortranStdlibLibArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// Add arguments to use HIP device libraries. If \p UseMLinkOpt is true, + /// device library arguments are preceded by -mlink-builtin-bitcode option. + /// Otherwise they are directly passed. + virtual void addHIPDeviceLibArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, + bool UseMLinkOpt) const; + /// Return sanitizers which are available in this toolchain. /// Return sanitizers which are available in this toolchain. virtual SanitizerMask getSupportedSanitizers() const; Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -2887,12 +2887,15 @@ class HIPActionBuilder final : public CudaActionBuilderBase { /// The linker inputs obtained for each device arch. SmallVector DeviceLinkerInputs; + bool GPUSanitize; public: HIPActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) { DefaultCudaArch = CudaArch::GFX803; + GPUSanitize = Args.hasFlag(options::OPT_fgpu_sanitize, + options::OPT_fno_gpu_sanitize, false); } bool canUseBundlerUnbundler() const override { return true; } @@ -2941,17 +2944,33 @@ // a fat binary containing all the code objects for different GPU's. // The fat binary is then an input to the host action. for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { - auto BackendAction = C.getDriver().ConstructPhaseAction( - C, Args, phases::Backend, CudaDeviceActions[I], - AssociatedOffloadKind); - auto AssembleAction = C.getDriver().ConstructPhaseAction( - C, Args, phases::Assemble, BackendAction, AssociatedOffloadKind); - // Create a link action to link device IR with device library - // and generate ISA. - ActionList AL; - AL.push_back(AssembleAction); - CudaDeviceActions[I] = - C.MakeAction(AL, types::TY_Image); + if (GPUSanitize) { + // When GPU sanitizer is enabled, since we need to link in the + // the sanitizer runtime library after the sanitize pass, we have + // to skip the backend and assemble phases and use lld to link + // the bitcode. + ActionList AL; + AL.push_back(CudaDeviceActions[I]); + // Create a link action to link device IR with device library + // and generate ISA. + CudaDeviceActions[I] = + C.MakeAction(AL, types::TY_Image); + } else { + // When GPU sanitizer is not enabled, we follow the conventional + // compiler phases, including backend and assemble phases. + ActionList AL; + auto BackendAction = C.getDriver().ConstructPhaseAction( + C, Args, phases::Backend, CudaDeviceActions[I], + AssociatedOffloadKind); + auto AssembleAction = C.getDriver().ConstructPhaseAction( + C, Args, phases::Assemble, BackendAction, + AssociatedOffloadKind); + AL.push_back(AssembleAction); + // Create a link action to link device IR with device library + // and generate ISA. + CudaDeviceActions[I] = + C.MakeAction(AL, types::TY_Image); + } // OffloadingActionBuilder propagates device arch until an offload // action. Since the next action for creating fatbin does Index: clang/lib/Driver/SanitizerArgs.cpp =================================================================== --- clang/lib/Driver/SanitizerArgs.cpp +++ clang/lib/Driver/SanitizerArgs.cpp @@ -931,10 +931,15 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const { - // NVPTX/AMDGPU doesn't currently support sanitizers. Bailing out here means + // NVPTX doesn't currently support sanitizers. Bailing out here means // that e.g. -fsanitize=address applies only to host code, which is what we // want for now. - if (TC.getTriple().isNVPTX() || TC.getTriple().isAMDGPU()) + // + // AMDGPU sanitizer support is experimental and controlled by -fgpu-sanitize. + if (TC.getTriple().isNVPTX() || + (TC.getTriple().isAMDGPU() && + !Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize, + false))) return; // Translate available CoverageFeatures to corresponding clang-cc1 flags. Index: clang/lib/Driver/ToolChain.cpp =================================================================== --- clang/lib/Driver/ToolChain.cpp +++ clang/lib/Driver/ToolChain.cpp @@ -1180,6 +1180,10 @@ void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} +void ToolChain::addHIPDeviceLibArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args, + bool UseMLinkOpt) const {} + void ToolChain::AddIAMCUIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -50,6 +50,8 @@ OpenCL = FilePath; } else if (BaseName == "hip") { HIP = FilePath; + } else if (BaseName == "asanrtl") { + AsanRTL = FilePath; } else if (BaseName == "oclc_finite_only_off") { FiniteOnly.Off = FilePath; } else if (BaseName == "oclc_finite_only_on") { @@ -630,35 +632,23 @@ void RocmInstallationDetector::addCommonBitcodeLibCC1Args( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly, - bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const { - static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode"; - - CC1Args.push_back(LinkBitcodeFlag); - CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath())); - - CC1Args.push_back(LinkBitcodeFlag); - CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath())); - - CC1Args.push_back(LinkBitcodeFlag); - CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ))); - - CC1Args.push_back(LinkBitcodeFlag); - CC1Args.push_back(DriverArgs.MakeArgString( - getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath))); - - CC1Args.push_back(LinkBitcodeFlag); - CC1Args.push_back(DriverArgs.MakeArgString( - getFiniteOnlyPath(FiniteOnly || FastRelaxedMath))); - - CC1Args.push_back(LinkBitcodeFlag); - CC1Args.push_back( - DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt))); - - CC1Args.push_back(LinkBitcodeFlag); - CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64))); - - CC1Args.push_back(LinkBitcodeFlag); - CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); + bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt, + bool UseMLinkOpt) const { + + auto AddBCLib = [&](StringRef BCFile) { + if (UseMLinkOpt) + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(BCFile)); + }; + + AddBCLib(getOCMLPath()); + AddBCLib(getOCKLPath()); + AddBCLib(getDenormalsAreZeroPath(DAZ)); + AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)); + AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)); + AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt)); + AddBCLib(getWavefrontSize64Path(Wave64)); + AddBCLib(LibDeviceFile); } bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const { Index: clang/lib/Driver/ToolChains/HIP.h =================================================================== --- clang/lib/Driver/ToolChains/HIP.h +++ clang/lib/Driver/ToolChains/HIP.h @@ -89,6 +89,9 @@ llvm::opt::ArgStringList &CC1Args) const override; void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; + void addHIPDeviceLibArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + bool UseMLinkOpt) const override; SanitizerMask getSupportedSanitizers() const override; Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -37,17 +37,23 @@ namespace { const unsigned HIPCodeObjectAlign = 4096; +static void addBCLib(const ArgList &Args, ArgStringList &CmdArgs, + StringRef BitcodeFile, bool UseMLinkOpt = true) { + if (UseMLinkOpt) + CmdArgs.push_back("-mlink-builtin-bitcode"); + CmdArgs.push_back(Args.MakeArgString(BitcodeFile)); +} + static void addBCLib(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs, ArgStringList LibraryPaths, - StringRef BCName) { + StringRef BCName, bool UseMLinkOpt = true) { StringRef FullName; for (std::string LibraryPath : LibraryPaths) { SmallString<128> Path(LibraryPath); llvm::sys::path::append(Path, BCName); FullName = Path; if (llvm::sys::fs::exists(FullName)) { - CmdArgs.push_back("-mlink-builtin-bitcode"); - CmdArgs.push_back(Args.MakeArgString(FullName)); + addBCLib(Args, CmdArgs, FullName, UseMLinkOpt); return; } } @@ -105,6 +111,11 @@ LldArgs.append({"-o", Output.getFilename()}); for (auto Input : Inputs) LldArgs.push_back(Input.getFilename()); + + if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize, + false)) + TC.addHIPDeviceLibArgs(Args, LldArgs, /*UseMLinkOpt=*/false); + const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Lld, LldArgs, Inputs, Output)); @@ -265,13 +276,8 @@ Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - StringRef GpuArch = getGPUArch(DriverArgs); - assert(!GpuArch.empty() && "Must have an explicit GPU arch."); - (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && "Only HIP offloading kinds are supported for GPUs."); - auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); - const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); CC1Args.push_back("-fcuda-is-device"); @@ -301,66 +307,7 @@ CC1Args.push_back("-fapply-global-visibility-to-externs"); } - if (DriverArgs.hasArg(options::OPT_nogpulib)) - return; - ArgStringList LibraryPaths; - - // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. - for (auto Path : RocmInstallation.getRocmDeviceLibPathArg()) - LibraryPaths.push_back(DriverArgs.MakeArgString(Path)); - - addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH"); - - // Maintain compatability with --hip-device-lib. - auto BCLibs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ); - if (!BCLibs.empty()) { - for (auto Lib : BCLibs) - addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib); - } else { - if (!RocmInstallation.hasDeviceLibrary()) { - getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; - return; - } - - std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); - if (LibDeviceFile.empty()) { - getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch; - return; - } - - // If --hip-device-lib is not set, add the default bitcode libraries. - // TODO: There are way too many flags that change this. Do we need to check - // them all? - bool DAZ = DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, - options::OPT_fno_cuda_flush_denormals_to_zero, - getDefaultDenormsAreZeroForTarget(Kind)); - // TODO: Check standard C++ flags? - bool FiniteOnly = false; - bool UnsafeMathOpt = false; - bool FastRelaxedMath = false; - bool CorrectSqrt = true; - bool Wave64 = isWave64(DriverArgs, Kind); - - // Add the HIP specific bitcode library. - CC1Args.push_back("-mlink-builtin-bitcode"); - CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getHIPPath())); - - // Add the generic set of libraries. - RocmInstallation.addCommonBitcodeLibCC1Args( - DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly, - UnsafeMathOpt, FastRelaxedMath, CorrectSqrt); - - // Add instrument lib. - auto InstLib = - DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); - if (InstLib.empty()) - return; - if (llvm::sys::fs::exists(InstLib)) { - CC1Args.push_back("-mlink-builtin-bitcode"); - CC1Args.push_back(DriverArgs.MakeArgString(InstLib)); - } else - getDriver().Diag(diag::err_drv_no_such_file) << InstLib; - } + addHIPDeviceLibArgs(DriverArgs, CC1Args, /*UseMLinkOpt=*/true); } llvm::opt::DerivedArgList * @@ -439,3 +386,87 @@ const ArgList &Args) const { return HostTC.computeMSVCVersion(D, Args); } + +void HIPToolChain::addHIPDeviceLibArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CmdArgs, + bool UseMLinkOpt) const { + if (DriverArgs.hasArg(options::OPT_nogpulib)) + return; + ArgStringList LibraryPaths; + + // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. + for (auto Path : RocmInstallation.getRocmDeviceLibPathArg()) + LibraryPaths.push_back(DriverArgs.MakeArgString(Path)); + + addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH"); + + // Maintain compatability with --hip-device-lib. + auto BCLibs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ); + if (!BCLibs.empty()) { + for (auto Lib : BCLibs) + addBCLib(getDriver(), DriverArgs, CmdArgs, LibraryPaths, Lib, + UseMLinkOpt); + } else { + if (!RocmInstallation.hasDeviceLibrary()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; + return; + } + StringRef GpuArch = getGPUArch(DriverArgs); + assert(!GpuArch.empty() && "Must have an explicit GPU arch."); + (void)GpuArch; + auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); + const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); + + std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); + if (LibDeviceFile.empty()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch; + return; + } + + // If --hip-device-lib is not set, add the default bitcode libraries. + // TODO: There are way too many flags that change this. Do we need to check + // them all? + bool DAZ = DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, + options::OPT_fno_cuda_flush_denormals_to_zero, + getDefaultDenormsAreZeroForTarget(Kind)); + // TODO: Check standard C++ flags? + bool FiniteOnly = false; + bool UnsafeMathOpt = false; + bool FastRelaxedMath = false; + bool CorrectSqrt = true; + bool Wave64 = isWave64(DriverArgs, Kind); + + if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, + options::OPT_fno_gpu_sanitize, false)) { + auto AsanRTL = RocmInstallation.getAsanRTLPath(); + if (AsanRTL.empty()) { + unsigned DiagID = getDriver().getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "AMDGPU address sanitizer runtime library (asanrtl) is not found. " + "Please install ROCm device library which supports address " + "sanitizer"); + getDriver().Diag(DiagID); + return; + } else + addBCLib(DriverArgs, CmdArgs, AsanRTL, UseMLinkOpt); + } + + // Add the HIP specific bitcode library. + addBCLib(DriverArgs, CmdArgs, RocmInstallation.getHIPPath(), UseMLinkOpt); + + // Add the generic set of libraries. + RocmInstallation.addCommonBitcodeLibCC1Args( + DriverArgs, CmdArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, + UnsafeMathOpt, FastRelaxedMath, CorrectSqrt, UseMLinkOpt); + + // Add instrument lib. + auto InstLib = + DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); + if (InstLib.empty()) + return; + if (llvm::sys::fs::exists(InstLib)) + addBCLib(DriverArgs, CmdArgs, InstLib, UseMLinkOpt); + else + getDriver().Diag(diag::err_drv_no_such_file) << InstLib; + } +} Index: clang/lib/Driver/ToolChains/ROCm.h =================================================================== --- clang/lib/Driver/ToolChains/ROCm.h +++ clang/lib/Driver/ToolChains/ROCm.h @@ -88,6 +88,9 @@ SmallString<0> OpenCL; SmallString<0> HIP; + // Asan runtime library + SmallString<0> AsanRTL; + // Libraries swapped based on compile flags. ConditionalLibrary WavefrontSize64; ConditionalLibrary FiniteOnly; @@ -117,7 +120,8 @@ llvm::opt::ArgStringList &CC1Args, StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, - bool FastRelaxedMath, bool CorrectSqrt) const; + bool FastRelaxedMath, bool CorrectSqrt, + bool UseMLinkOpt = true) const; /// Check whether we detected a valid HIP runtime. bool hasHIPRuntime() const { return HasHIPRuntime; } @@ -166,6 +170,9 @@ return HIP; } + /// Returns empty string of Asan runtime library is not available. + StringRef getAsanRTLPath() const { return AsanRTL; } + StringRef getWavefrontSize64Path(bool Enabled) const { return WavefrontSize64.get(Enabled); } Index: clang/test/Driver/Inputs/rocm-invalid/bin/.hipVersion =================================================================== --- /dev/null +++ clang/test/Driver/Inputs/rocm-invalid/bin/.hipVersion @@ -0,0 +1,6 @@ +# Auto-generated by cmake +# NOTE: The trailing whitespace is added on purpose to verify that these +# whitespaces are trimmed before paring. +HIP_VERSION_MAJOR=3 +HIP_VERSION_MINOR=6 +HIP_VERSION_PATCH=20214-a2917cd Index: clang/test/Driver/hip-sanitize-options.hip =================================================================== --- clang/test/Driver/hip-sanitize-options.hip +++ clang/test/Driver/hip-sanitize-options.hip @@ -1,9 +1,40 @@ // REQUIRES: clang-driver, x86-registered-target, amdgpu-registered-target -// RUN: %clang -### -target x86_64-unknown-linux-gnu --offload-arch=gfx906 \ +// RUN: %clang -### -target x86_64-unknown-linux-gnu --offload-arch=gfx900 \ // RUN: -fsanitize=address \ -// RUN: -nogpuinc -nogpulib \ +// RUN: -nogpuinc --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu --offload-arch=gfx900 \ +// RUN: -fsanitize=address -fno-gpu-sanitize \ +// RUN: -nogpuinc --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck %s + +// RUN: %clang -### -target x86_64-unknown-linux-gnu --offload-arch=gfx900 \ +// RUN: -fsanitize=address -fgpu-sanitize \ +// RUN: -nogpuinc --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefixes=NORDC %s + +// RUN: %clang -### -target x86_64-unknown-linux-gnu --offload-arch=gfx900 \ +// RUN: -fsanitize=address -fgpu-sanitize -fgpu-rdc \ +// RUN: -nogpuinc --rocm-path=%S/Inputs/rocm \ +// RUN: %s 2>&1 | FileCheck -check-prefixes=RDC %s + +// RUN: %clang -### -target x86_64-unknown-linux-gnu --offload-arch=gfx900 \ +// RUN: -fsanitize=address -fgpu-sanitize \ +// RUN: -nogpuinc --rocm-path=%S/Inputs/rocm-invalid \ +// RUN: %s 2>&1 | FileCheck -check-prefixes=FAIL %s + // CHECK-NOT: {{"[^"]*clang[^"]*".* "-fcuda-is-device".* "-fsanitize=address"}} +// CHECK-NOT: {{"[^"]*lld[^"]*".* ".*hip.bc"}} // CHECK: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}} + +// NORDC: {{"[^"]*clang[^"]*".* "-fcuda-is-device".* "-fsanitize=address".*}} "-o" "[[OUT:[^"]*.bc]]" +// NORDC: {{"[^"]*lld[^"]*".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}} +// NORDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}} + +// RDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* "-fsanitize=address"}} +// RDC: {{"[^"]*clang[^"]*".* "-emit-llvm-bc".* "-fcuda-is-device".* "-fsanitize=address".*}} "-o" "[[OUT:[^"]*.bc]]" +// RDC: {{"[^"]*lld[^"]*".*}} "[[OUT]]" {{".*asanrtl.bc" ".*hip.bc"}} + +// FAIL: AMDGPU address sanitizer runtime library (asanrtl) is not found. Please install ROCm device library which supports address sanitizer