Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -672,6 +672,9 @@ def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">; +def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, + HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " + "__cyg_profile_func_enter and __cyg_profile_func_exit">; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -321,6 +321,15 @@ RocmInstallation.addCommonBitcodeLibCC1Args( DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, FastRelaxedMath, CorrectSqrt); + + // Add instrument lib. + auto InstLib = + DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); + if (llvm::sys::fs::exists(InstLib)) { + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(InstLib)); + } else + getDriver().Diag(diag::err_drv_no_such_file) << InstLib; } } Index: clang/test/Driver/hip-device-libs.hip =================================================================== --- clang/test/Driver/hip-device-libs.hip +++ clang/test/Driver/hip-device-libs.hip @@ -105,6 +105,14 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL +// Test --gpu-instrument-lib +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: --gpu-instrument-lib=%S/Inputs/hip_multiple_inputs/instrument.bc \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,INST + // ALL: {{"[^"]*clang[^"]*"}} // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}ocml.bc" @@ -118,3 +126,4 @@ // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc" +// INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc"