Index: clang/include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticDriverKinds.td +++ clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -70,6 +70,16 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime; provide its path via '--rocm-path', or pass " "'-nogpuinc' to build without HIP runtime">; +def err_drv_no_hip_stdpar_lib : Error< + "cannot find HIP Standard Parallelism Acceleration library; provide it via " + "'--stdpar-path'">; +def err_drv_no_hip_stdpar_thrust_lib : Error< + "cannot find rocThrust, which is required by the HIP Standard Parallelism " + "Acceleration library; provide it via " + "'--stdpar-thrust-path'">; +def err_drv_no_hip_stdpar_prim_lib : Error< + "cannot find rocPrim, which is required by the HIP Standard Parallelism " + "Acceleration library; provide it via '--stdpar-prim-path'">; def err_drv_no_hipspv_device_lib : Error< "cannot find HIP device library%select{| for %1}0; provide its path via " Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -279,6 +279,8 @@ LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP") LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)") +LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)") +LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)") LANGOPT(SizedDeallocation , 1, 0, "sized deallocation") LANGOPT(AlignedAllocation , 1, 0, "aligned allocation") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1064,6 +1064,21 @@ HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +// TODO: use MarshallingInfo here +def stdpar_path_EQ : Joined<["--"], "stdpar-path=">, Group, + HelpText< + "HIP Standard Parallel Algorithm Acceleration library path, used for " + "finding and implicitly including the library header.">; +def stdpar_thrust_path_EQ : Joined<["--"], "stdpar-thrust-path=">, + Group, + HelpText< + "rocThrust path, required by the HIP Standard Parallel Algorithm " + "Acceleration library, used to implicitly include the rocThrust library.">; +def stdpar_prim_path_EQ : Joined<["--"], "stdpar-prim-path=">, + Group, + HelpText< + "rocPrim path, required by the HIP Standard Parallel Algorithm " + "Acceleration library, used to implicitly include the rocPrim library.">; def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, HelpText<"Tool used for detecting AMD GPU arch in the system.">; def nvptx_arch_tool_EQ : Joined<["--"], "nvptx-arch-tool=">, Group, @@ -4634,6 +4649,18 @@ MetaVarName<"">; def y : Joined<["-"], "y">; +// TODO: we may want to alias this to -x hip +def stdpar : Flag<["-", "--"], "stdpar">, Flags<[CoreOption, CC1Option]>, + Group, + HelpText<"Enable HIP acceleration for standard parallel algorithms">, + MarshallingInfoFlag>; +def stdpar_interpose_alloc : Flag<["-", "--"], "stdpar-interpose-alloc">, + Flags<[CoreOption, CC1Option]>, + Group, + HelpText<"Replace all memory allocation / deallocation calls with " + "hipManagedMalloc / hipFree equivalents.">, + MarshallingInfoFlag>; + defm integrated_as : BoolFOption<"integrated-as", CodeGenOpts<"DisableIntegratedAS">, DefaultFalse, NegFlag, PosFlag, Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -791,7 +791,8 @@ [](std::pair &I) { return types::isHIP(I.first); }) || - C.getInputArgs().hasArg(options::OPT_hip_link); + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_stdpar); if (IsCuda && IsHIP) { Diag(clang::diag::err_drv_mix_cuda_hip); return; @@ -2743,6 +2744,10 @@ } } + if ((Ty == types::TY_C || Ty == types::TY_CXX) && + Args.hasArgNoClaim(options::OPT_stdpar)) + Ty = types::TY_HIP; + if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true)) Inputs.push_back(std::make_pair(Ty, A)); @@ -3953,6 +3958,11 @@ phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg); if (FinalPhase == phases::Link) { + if (Args.hasArgNoClaim(options::OPT_stdpar)) { + Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link)); + Args.AddFlagArg(nullptr, + getOpts().getOption(options::OPT_frtlib_add_rpath)); + } // Emitting LLVM while linking disabled except in HIPAMD Toolchain if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link)) Diag(clang::diag::err_drv_emit_llvm_link); Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -329,6 +329,19 @@ RocmDeviceLibPathArg = Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); + HIPStdParPathArg = + Args.getLastArgValue(clang::driver::options::OPT_stdpar_path_EQ); + HasHIPStdParLibrary = !HIPStdParPathArg.empty() && + D.getVFS().exists(HIPStdParPathArg + "/stdpar_lib.hpp"); + HIPRocThrustPathArg = + Args.getLastArgValue(clang::driver::options::OPT_stdpar_thrust_path_EQ); + HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && + D.getVFS().exists(HIPRocThrustPathArg + "/thrust"); + HIPRocPrimPathArg = + Args.getLastArgValue(clang::driver::options::OPT_stdpar_prim_path_EQ); + HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && + D.getVFS().exists(HIPRocPrimPathArg + "/rocprim"); + if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { HIPVersionArg = A->getValue(); unsigned Major = ~0U; @@ -507,6 +520,7 @@ ArgStringList &CC1Args) const { bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && !DriverArgs.hasArg(options::OPT_nohipwrapperinc); + bool HasStdPar = DriverArgs.hasArg(options::OPT_stdpar); if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { // HIP header includes standard library wrapper headers under clang @@ -529,8 +543,45 @@ CC1Args.push_back(DriverArgs.MakeArgString(P)); } - if (DriverArgs.hasArg(options::OPT_nogpuinc)) + const auto HandleStdPar = [=, &DriverArgs, &CC1Args]() { + if (!hasHIPStdParLibrary()) { + D.Diag(diag::err_drv_no_hip_stdpar_lib); + return; + } + if (!HasRocThrustLibrary && + !D.getVFS().exists(getIncludePath() + "/thrust")) { + D.Diag(diag::err_drv_no_hip_stdpar_thrust_lib); + return; + } + if (!HasRocPrimLibrary && + !D.getVFS().exists(getIncludePath() + "/rocprim")) { + D.Diag(diag::err_drv_no_hip_stdpar_prim_lib); + return; + } + + const char *ThrustPath; + if (HasRocThrustLibrary) + ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg); + else + ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust"); + + const char *PrimPath; + if (HasRocPrimLibrary) + PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg); + else + PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim"); + + CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath, + "-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg), + "-include", "stdpar_lib.hpp"}); + }; + + if (DriverArgs.hasArg(options::OPT_nogpuinc)) { + if (HasStdPar) + HandleStdPar(); + return; + } if (!hasHIPRuntime()) { D.Diag(diag::err_drv_no_hip_runtime); @@ -541,6 +592,8 @@ CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); if (UsesRuntimeWrapper) CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); + if (HasStdPar) + HandleStdPar(); } void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6543,6 +6543,12 @@ if (Args.hasFlag(options::OPT_fgpu_allow_device_init, options::OPT_fno_gpu_allow_device_init, false)) CmdArgs.push_back("-fgpu-allow-device-init"); + if (Args.hasArg(options::OPT_stdpar)) { + CmdArgs.push_back("-stdpar"); + + if (Args.hasArg(options::OPT_stdpar_interpose_alloc)) + CmdArgs.push_back("-stdpar-interpose-alloc"); + } Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name, options::OPT_fno_hip_kernel_arg_name); } Index: clang/lib/Driver/ToolChains/HIPAMD.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIPAMD.cpp +++ clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -115,6 +115,8 @@ "--no-undefined", "-shared", "-plugin-opt=-amdgpu-internalize-symbols"}; + if (Args.hasArg(options::OPT_stdpar)) + LldArgs.push_back("-plugin-opt=-amdgpu-enable-stdpar"); auto &TC = getToolChain(); auto &D = TC.getDriver(); @@ -246,6 +248,8 @@ if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); + if (DriverArgs.hasArgNoClaim(options::OPT_stdpar)) + CC1Args.append({"-mllvm", "-amdgpu-enable-stdpar"}); StringRef MaxThreadsPerBlock = DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); Index: clang/lib/Driver/ToolChains/ROCm.h =================================================================== --- clang/lib/Driver/ToolChains/ROCm.h +++ clang/lib/Driver/ToolChains/ROCm.h @@ -77,6 +77,9 @@ const Driver &D; bool HasHIPRuntime = false; bool HasDeviceLibrary = false; + bool HasHIPStdParLibrary = false; + bool HasRocThrustLibrary = false; + bool HasRocPrimLibrary = false; // Default version if not detected or specified. const unsigned DefaultVersionMajor = 3; @@ -96,6 +99,13 @@ std::vector RocmDeviceLibPathArg; // HIP runtime path specified by --hip-path. StringRef HIPPathArg; + // HIP Standard Parallel Algorithm acceleration library specified by + // --stdpar-path + StringRef HIPStdParPathArg; + // rocThrust algorithm library specified by --stdpar-thrust-path + StringRef HIPRocThrustPathArg; + // rocPrim algorithm library specified by --stdpar-prim-path + StringRef HIPRocPrimPathArg; // HIP version specified by --hip-version. StringRef HIPVersionArg; // Wheter -nogpulib is specified. @@ -180,6 +190,9 @@ /// Check whether we detected a valid ROCm device library. bool hasDeviceLibrary() const { return HasDeviceLibrary; } + /// Check whether we detected a valid HIP STDPAR Acceleration library. + bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; } + /// Print information about the detected ROCm installation. void print(raw_ostream &OS) const; Index: clang/test/Driver/stdpar.c =================================================================== --- /dev/null +++ clang/test/Driver/stdpar.c @@ -0,0 +1,18 @@ +// RUN: %clang -### -stdpar --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-MISSING-LIB %s +// STDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--stdpar-path' + +// RUN: %clang -### --stdpar --stdpar-path=%S/Inputs/stdpar \ +// RUN: --stdpar-thrust-path=%S/Inputs/stdpar/thrust \ +// RUN: --stdpar-prim-path=%S/Inputs/stdpar/prim --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-COMPILE %s +// STDPAR-COMPILE: "-x" "hip" +// STDPAR-COMPILE: "-idirafter" "{{.*/thrust}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/prim}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar}}" +// STDPAR-COMPILE: "-include" "stdpar_lib.hpp" + +// RUN: touch %t.o +// RUN: %clang -### -stdpar %t.o 2>&1 | FileCheck --check-prefix=STDPAR-LINK %s +// STDPAR-LINK: "-rpath" +// STDPAR-LINK: "-l{{.*hip.*}}"