Index: clang/include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticDriverKinds.td +++ clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -70,6 +70,16 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime; provide its path via '--rocm-path', or pass " "'-nogpuinc' to build without HIP runtime">; +def err_drv_no_hipstdpar_lib : Error< + "cannot find HIP Standard Parallelism Acceleration library; provide it via " + "'--hipstdpar-path'">; +def err_drv_no_hipstdpar_thrust_lib : Error< + "cannot find rocThrust, which is required by the HIP Standard Parallelism " + "Acceleration library; provide it via " + "'--hipstdpar-thrust-path'">; +def err_drv_no_hipstdpar_prim_lib : Error< + "cannot find rocPrim, which is required by the HIP Standard Parallelism " + "Acceleration library; provide it via '--hipstdpar-prim-path'">; def err_drv_no_hipspv_device_lib : Error< "cannot find HIP device library%select{| for %1}0; provide its path via " Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -280,6 +280,8 @@ LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP") LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)") +LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)") +LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)") LANGOPT(SizedDeallocation , 1, 0, "sized deallocation") LANGOPT(AlignedAllocation , 1, 0, "aligned allocation") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1258,6 +1258,32 @@ HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +def hipstdpar : Flag<["--"], "hipstdpar">, + Visibility<[ClangOption, CC1Option]>, + Group, + HelpText<"Enable HIP acceleration for standard parallel algorithms">, + MarshallingInfoFlag>; +def hipstdpar_interpose_alloc : Flag<["--"], "hipstdpar-interpose-alloc">, + Visibility<[ClangOption, CC1Option]>, + Group, + HelpText<"Replace all memory allocation / deallocation calls with " + "hipManagedMalloc / hipFree equivalents">, + MarshallingInfoFlag>; +// TODO: use MarshallingInfo here +def hipstdpar_path_EQ : Joined<["--"], "hipstdpar-path=">, Group, + HelpText< + "HIP Standard Parallel Algorithm Acceleration library path, used for " + "finding and implicitly including the library header">; +def hipstdpar_thrust_path_EQ : Joined<["--"], "hipstdpar-thrust-path=">, + Group, + HelpText< + "rocThrust path, required by the HIP Standard Parallel Algorithm " + "Acceleration library, used to implicitly include the rocThrust library">; +def hipstdpar_prim_path_EQ : Joined<["--"], "hipstdpar-prim-path=">, + Group, + HelpText< + "rocPrim path, required by the HIP Standard Parallel Algorithm " + "Acceleration library, used to implicitly include the rocPrim library">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -767,7 +767,8 @@ [](std::pair &I) { return types::isHIP(I.first); }) || - C.getInputArgs().hasArg(options::OPT_hip_link); + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_hipstdpar); if (IsCuda && IsHIP) { Diag(clang::diag::err_drv_mix_cuda_hip); return; @@ -2705,6 +2706,10 @@ } } + if ((Ty == types::TY_C || Ty == types::TY_CXX) && + Args.hasArgNoClaim(options::OPT_hipstdpar)) + Ty = types::TY_HIP; + if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true)) Inputs.push_back(std::make_pair(Ty, A)); @@ -3915,6 +3920,11 @@ phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg); if (FinalPhase == phases::Link) { + if (Args.hasArgNoClaim(options::OPT_hipstdpar)) { + Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link)); + Args.AddFlagArg(nullptr, + getOpts().getOption(options::OPT_frtlib_add_rpath)); + } // Emitting LLVM while linking disabled except in HIPAMD Toolchain if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link)) Diag(clang::diag::err_drv_emit_llvm_link); Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -329,6 +329,20 @@ RocmDeviceLibPathArg = Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); + HIPStdParPathArg = + Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ); + HasHIPStdParLibrary = + !HIPStdParPathArg.empty() && D.getVFS().exists(HIPStdParPathArg + + "/hipstdpar_lib.hpp"); + HIPRocThrustPathArg = + Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ); + HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && + D.getVFS().exists(HIPRocThrustPathArg + "/thrust"); + HIPRocPrimPathArg = + Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ); + HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && + D.getVFS().exists(HIPRocPrimPathArg + "/rocprim"); + if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { HIPVersionArg = A->getValue(); unsigned Major = ~0U; @@ -507,6 +521,7 @@ ArgStringList &CC1Args) const { bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && !DriverArgs.hasArg(options::OPT_nohipwrapperinc); + bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar); if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { // HIP header includes standard library wrapper headers under clang @@ -529,8 +544,45 @@ CC1Args.push_back(DriverArgs.MakeArgString(P)); } - if (DriverArgs.hasArg(options::OPT_nogpuinc)) + const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() { + if (!hasHIPStdParLibrary()) { + D.Diag(diag::err_drv_no_hipstdpar_lib); + return; + } + if (!HasRocThrustLibrary && + !D.getVFS().exists(getIncludePath() + "/thrust")) { + D.Diag(diag::err_drv_no_hipstdpar_thrust_lib); + return; + } + if (!HasRocPrimLibrary && + !D.getVFS().exists(getIncludePath() + "/rocprim")) { + D.Diag(diag::err_drv_no_hipstdpar_prim_lib); + return; + } + + const char *ThrustPath; + if (HasRocThrustLibrary) + ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg); + else + ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust"); + + const char *PrimPath; + if (HasRocPrimLibrary) + PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg); + else + PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim"); + + CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath, + "-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg), + "-include", "hipstdpar_lib.hpp"}); + }; + + if (DriverArgs.hasArg(options::OPT_nogpuinc)) { + if (HasHipStdPar) + HandleHipStdPar(); + return; + } if (!hasHIPRuntime()) { D.Diag(diag::err_drv_no_hip_runtime); @@ -541,6 +593,8 @@ CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); if (UsesRuntimeWrapper) CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); + if (HasHipStdPar) + HandleHipStdPar(); } void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6580,6 +6580,8 @@ CmdArgs.push_back("-fhip-new-launch-api"); Args.addOptInFlag(CmdArgs, options::OPT_fgpu_allow_device_init, options::OPT_fno_gpu_allow_device_init); + Args.AddLastArg(CmdArgs, options::OPT_hipstdpar); + Args.AddLastArg(CmdArgs, options::OPT_hipstdpar_interpose_alloc); Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name, options::OPT_fno_hip_kernel_arg_name); } Index: clang/lib/Driver/ToolChains/HIPAMD.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIPAMD.cpp +++ clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -113,6 +113,8 @@ "--no-undefined", "-shared", "-plugin-opt=-amdgpu-internalize-symbols"}; + if (Args.hasArg(options::OPT_hipstdpar)) + LldArgs.push_back("-plugin-opt=-amdgpu-enable-hipstdpar"); auto &TC = getToolChain(); auto &D = TC.getDriver(); @@ -242,6 +244,8 @@ if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); + if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar)) + CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"}); StringRef MaxThreadsPerBlock = DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); Index: clang/lib/Driver/ToolChains/ROCm.h =================================================================== --- clang/lib/Driver/ToolChains/ROCm.h +++ clang/lib/Driver/ToolChains/ROCm.h @@ -77,6 +77,9 @@ const Driver &D; bool HasHIPRuntime = false; bool HasDeviceLibrary = false; + bool HasHIPStdParLibrary = false; + bool HasRocThrustLibrary = false; + bool HasRocPrimLibrary = false; // Default version if not detected or specified. const unsigned DefaultVersionMajor = 3; @@ -96,6 +99,13 @@ std::vector RocmDeviceLibPathArg; // HIP runtime path specified by --hip-path. StringRef HIPPathArg; + // HIP Standard Parallel Algorithm acceleration library specified by + // --hipstdpar-path + StringRef HIPStdParPathArg; + // rocThrust algorithm library specified by --hipstdpar-thrust-path + StringRef HIPRocThrustPathArg; + // rocPrim algorithm library specified by --hipstdpar-prim-path + StringRef HIPRocPrimPathArg; // HIP version specified by --hip-version. StringRef HIPVersionArg; // Wheter -nogpulib is specified. @@ -180,6 +190,9 @@ /// Check whether we detected a valid ROCm device library. bool hasDeviceLibrary() const { return HasDeviceLibrary; } + /// Check whether we detected a valid HIP STDPAR Acceleration library. + bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; } + /// Print information about the detected ROCm installation. void print(raw_ostream &OS) const; Index: clang/test/Driver/hipstdpar.c =================================================================== --- /dev/null +++ clang/test/Driver/hipstdpar.c @@ -0,0 +1,18 @@ +// RUN: not %clang -### --hipstdpar -nogpulib -nogpuinc --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=HIPSTDPAR-MISSING-LIB %s +// RUN: %clang -### --hipstdpar --hipstdpar-path=%S/Inputs/hipstdpar \ +// RUN: --hipstdpar-thrust-path=%S/Inputs/hipstdpar/thrust \ +// RUN: --hipstdpar-prim-path=%S/Inputs/hipstdpar/rocprim \ +// RUN: -nogpulib -nogpuinc --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=HIPSTDPAR-COMPILE %s +// RUN: touch %t.o +// RUN: %clang -### --hipstdpar %t.o 2>&1 | FileCheck --check-prefix=HIPSTDPAR-LINK %s + +// HIPSTDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--hipstdpar-path' +// HIPSTDPAR-COMPILE: "-x" "hip" +// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/thrust}}" +// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/rocprim}}" +// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/Inputs/hipstdpar}}" +// HIPSTDPAR-COMPILE: "-include" "hipstdpar_lib.hpp" +// HIPSTDPAR-LINK: "-rpath" +// HIPSTDPAR-LINK: "-l{{.*hip.*}}"