Index: clang/include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticDriverKinds.td +++ clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -70,6 +70,16 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime; provide its path via '--rocm-path', or pass " "'-nogpuinc' to build without HIP runtime">; +def err_drv_no_hip_stdpar_lib : Error< + "cannot find HIP Standard Parallelism Acceleration library; provide it via " + "'--stdpar-path'">; +def err_drv_no_hip_stdpar_thrust_lib : Error< + "cannot find rocThrust, which is required by the HIP Standard Parallelism " + "Acceleration library; provide it via " + "'--stdpar-thrust-path'">; +def err_drv_no_hip_stdpar_prim_lib : Error< + "cannot find rocPrim, which is required by the HIP Standard Parallelism " + "Acceleration library; provide it via '--stdpar-prim-path'">; def err_drv_no_hipspv_device_lib : Error< "cannot find HIP device library%select{| for %1}0; provide its path via " Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -278,6 +278,8 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used") LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP") +LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)") +LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)") LANGOPT(SizedDeallocation , 1, 0, "sized deallocation") LANGOPT(AlignedAllocation , 1, 0, "aligned allocation") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1059,6 +1059,21 @@ HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +// TODO: use MarshallingInfo here +def stdpar_path_EQ : Joined<["--"], "stdpar-path=">, Group, + HelpText< + "HIP Standard Parallel Algorithm Acceleration library path, used for " + "finding and implicitly including the library header.">; +def stdpar_thrust_path_EQ : Joined<["--"], "stdpar-thrust-path=">, + Group, + HelpText< + "rocThrust path, required by the HIP Standard Parallel Algorithm " + "Acceleration library, used to implicitly include the rocThrust library.">; +def stdpar_prim_path_EQ : Joined<["--"], "stdpar-prim-path=">, + Group, + HelpText< + "rocPrim path, required by the HIP Standard Parallel Algorithm " + "Acceleration library, used to implicitly include the rocPrim library.">; def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, HelpText<"Tool used for detecting AMD GPU arch in the system.">; def nvptx_arch_tool_EQ : Joined<["--"], "nvptx-arch-tool=">, Group, @@ -4624,6 +4639,18 @@ MetaVarName<"">; def y : Joined<["-"], "y">; +// TODO: we may want to alias this to -x hip +def stdpar : Flag<["-", "--"], "stdpar">, Flags<[CoreOption, CC1Option]>, + Group, + HelpText<"Enable HIP acceleration for standard parallel algorithms">, + MarshallingInfoFlag>; +def stdpar_interpose_alloc : Flag<["-", "--"], "stdpar-interpose-alloc">, + Flags<[CoreOption, CC1Option]>, + Group, + HelpText<"Replace all memory allocation / deallocation calls with " + "hipManagedMalloc / hipFree equivalents.">, + MarshallingInfoFlag>; + defm integrated_as : BoolFOption<"integrated-as", CodeGenOpts<"DisableIntegratedAS">, DefaultFalse, NegFlag, PosFlag, Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -791,7 +791,8 @@ [](std::pair &I) { return types::isHIP(I.first); }) || - C.getInputArgs().hasArg(options::OPT_hip_link); + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_stdpar); if (IsCuda && IsHIP) { Diag(clang::diag::err_drv_mix_cuda_hip); return; @@ -2742,6 +2743,10 @@ } } + if ((Ty == types::TY_C || Ty == types::TY_CXX) && + Args.hasArgNoClaim(options::OPT_stdpar)) + Ty = types::TY_HIP; + if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true)) Inputs.push_back(std::make_pair(Ty, A)); @@ -3952,6 +3957,11 @@ phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg); if (FinalPhase == phases::Link) { + if (Args.hasArgNoClaim(options::OPT_stdpar)) { + Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link)); + Args.AddFlagArg(nullptr, + getOpts().getOption(options::OPT_frtlib_add_rpath)); + } // Emitting LLVM while linking disabled except in HIPAMD Toolchain if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link)) Diag(clang::diag::err_drv_emit_llvm_link); Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -329,6 +329,19 @@ RocmDeviceLibPathArg = Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); + HIPStdParPathArg = + Args.getLastArgValue(clang::driver::options::OPT_stdpar_path_EQ); + HasHIPStdParLibrary = !HIPStdParPathArg.empty() && + D.getVFS().exists(HIPStdParPathArg + "/stdpar_lib.hpp"); + HIPRocThrustPathArg = + Args.getLastArgValue(clang::driver::options::OPT_stdpar_thrust_path_EQ); + HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && + D.getVFS().exists(HIPRocThrustPathArg + "/thrust"); + HIPRocPrimPathArg = + Args.getLastArgValue(clang::driver::options::OPT_stdpar_prim_path_EQ); + HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && + D.getVFS().exists(HIPRocPrimPathArg + "/rocprim"); + if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { HIPVersionArg = A->getValue(); unsigned Major = ~0U; @@ -507,6 +520,7 @@ ArgStringList &CC1Args) const { bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && !DriverArgs.hasArg(options::OPT_nohipwrapperinc); + bool HasStdPar = DriverArgs.hasArg(options::OPT_stdpar); if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { // HIP header includes standard library wrapper headers under clang @@ -529,8 +543,45 @@ CC1Args.push_back(DriverArgs.MakeArgString(P)); } - if (DriverArgs.hasArg(options::OPT_nogpuinc)) + const auto HandleStdPar = [=, &DriverArgs, &CC1Args]() { + if (!hasHIPStdParLibrary()) { + D.Diag(diag::err_drv_no_hip_stdpar_lib); + return; + } + if (!HasRocThrustLibrary && + !D.getVFS().exists(getIncludePath() + "/thrust")) { + D.Diag(diag::err_drv_no_hip_stdpar_thrust_lib); + return; + } + if (!HasRocPrimLibrary && + !D.getVFS().exists(getIncludePath() + "/rocprim")) { + D.Diag(diag::err_drv_no_hip_stdpar_prim_lib); + return; + } + + const char *ThrustPath; + if (HasRocThrustLibrary) + ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg); + else + ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust"); + + const char *PrimPath; + if (HasRocPrimLibrary) + PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg); + else + PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim"); + + CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath, + "-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg), + "-include", "stdpar_lib.hpp"}); + }; + + if (DriverArgs.hasArg(options::OPT_nogpuinc)) { + if (HasStdPar) + HandleStdPar(); + return; + } if (!hasHIPRuntime()) { D.Diag(diag::err_drv_no_hip_runtime); @@ -541,6 +592,8 @@ CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); if (UsesRuntimeWrapper) CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); + if (HasStdPar) + HandleStdPar(); } void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6527,6 +6527,12 @@ if (Args.hasFlag(options::OPT_fgpu_allow_device_init, options::OPT_fno_gpu_allow_device_init, false)) CmdArgs.push_back("-fgpu-allow-device-init"); + if (Args.hasArg(options::OPT_stdpar)) { + CmdArgs.push_back("-stdpar"); + + if (Args.hasArg(options::OPT_stdpar_interpose_alloc)) + CmdArgs.push_back("-stdpar-interpose-alloc"); + } Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name, options::OPT_fno_hip_kernel_arg_name); } Index: clang/lib/Driver/ToolChains/ROCm.h =================================================================== --- clang/lib/Driver/ToolChains/ROCm.h +++ clang/lib/Driver/ToolChains/ROCm.h @@ -77,6 +77,9 @@ const Driver &D; bool HasHIPRuntime = false; bool HasDeviceLibrary = false; + bool HasHIPStdParLibrary = false; + bool HasRocThrustLibrary = false; + bool HasRocPrimLibrary = false; // Default version if not detected or specified. const unsigned DefaultVersionMajor = 3; @@ -96,6 +99,13 @@ std::vector RocmDeviceLibPathArg; // HIP runtime path specified by --hip-path. StringRef HIPPathArg; + // HIP Standard Parallel Algorithm acceleration library specified by + // --stdpar-path + StringRef HIPStdParPathArg; + // rocThrust algorithm library specified by --stdpar-thrust-path + StringRef HIPRocThrustPathArg; + // rocPrim algorithm library specified by --stdpar-prim-path + StringRef HIPRocPrimPathArg; // HIP version specified by --hip-version. StringRef HIPVersionArg; // Wheter -nogpulib is specified. @@ -180,6 +190,9 @@ /// Check whether we detected a valid ROCm device library. bool hasDeviceLibrary() const { return HasDeviceLibrary; } + /// Check whether we detected a valid HIP STDPAR Acceleration library. + bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; } + /// Print information about the detected ROCm installation. void print(raw_ostream &OS) const; Index: clang/test/Driver/stdpar.c =================================================================== --- /dev/null +++ clang/test/Driver/stdpar.c @@ -0,0 +1,18 @@ +// RUN: %clang -### -stdpar --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-MISSING-LIB %s +// STDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--stdpar-path' + +// RUN: %clang -### --stdpar --stdpar-path=%S/Inputs/stdpar \ +// RUN: --stdpar-thrust-path=%S/Inputs/stdpar/thrust \ +// RUN: --stdpar-prim-path=%S/Inputs/stdpar/prim --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=STDPAR-COMPILE %s +// STDPAR-COMPILE: "-x" "hip" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar/thrust}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar/prim}}" +// STDPAR-COMPILE: "-idirafter" "{{.*/Inputs/stdpar}}" +// STDPAR-COMPILE: "-include" "stdpar_lib.hpp" + +// RUN: touch %t.o +// RUN: %clang -### -stdpar %t.o 2>&1 | FileCheck --check-prefix=STDPAR-LINK %s +// STDPAR-LINK: "-rpath" +// STDPAR-LINK: "-l{{.*hip.*}}" Index: llvm/include/llvm/CodeGen/AccelTable.h =================================================================== --- llvm/include/llvm/CodeGen/AccelTable.h +++ llvm/include/llvm/CodeGen/AccelTable.h @@ -14,7 +14,6 @@ #define LLVM_CODEGEN_ACCELTABLE_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -143,6 +142,9 @@ std::vector Values; MCSymbol *Sym; + HashData(DwarfStringPoolEntryRef Name, HashFn *Hash) + : Name(Name), HashValue(Hash(Name.getString())) {} + #ifndef NDEBUG void print(raw_ostream &OS) const; void dump() const { print(dbgs()); } @@ -155,7 +157,7 @@ /// Allocator for HashData and Values. BumpPtrAllocator Allocator; - using StringEntries = MapVector; + using StringEntries = StringMap; StringEntries Entries; HashFn *Hash; @@ -167,7 +169,7 @@ void computeBucketCount(); - AccelTableBase(HashFn *Hash) : Hash(Hash) {} + AccelTableBase(HashFn *Hash) : Entries(Allocator), Hash(Hash) {} public: void finalize(AsmPrinter *Asm, StringRef Prefix); @@ -205,13 +207,10 @@ assert(Buckets.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. - auto &It = Entries[Name.getString()]; - if (It.Values.empty()) { - It.Name = Name; - It.HashValue = Hash(Name.getString()); - } - It.Values.push_back(new (Allocator) - AccelTableDataT(std::forward(Args)...)); + auto Iter = Entries.try_emplace(Name.getString(), Name, Hash).first; + assert(Iter->second.Name == Name); + Iter->second.Values.push_back( + new (Allocator) AccelTableDataT(std::forward(Args)...)); } /// A base class for different implementations of Data classes for Apple Index: llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp =================================================================== --- llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -664,9 +664,9 @@ void AccelTableBase::print(raw_ostream &OS) const { // Print Content. OS << "Entries: \n"; - for (const auto &[Name, Data] : Entries) { - OS << "Name: " << Name << "\n"; - for (auto *V : Data.Values) + for (const auto &Entry : Entries) { + OS << "Name: " << Entry.first() << "\n"; + for (auto *V : Entry.second.Values) V->print(OS); } Index: llvm/test/DebugInfo/Generic/accel-table-hash-collisions.ll =================================================================== --- llvm/test/DebugInfo/Generic/accel-table-hash-collisions.ll +++ llvm/test/DebugInfo/Generic/accel-table-hash-collisions.ll @@ -32,8 +32,8 @@ ; CHECK: String: 0x{{[0-9a-f]*}} "is" ; CHECK: Hash 0xa4b42a1e -; CHECK: String: 0x{{[0-9a-f]*}} "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE" ; CHECK: String: 0x{{[0-9a-f]*}} "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv" +; CHECK: String: 0x{{[0-9a-f]*}} "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE" ; CHECK: Hash 0xeee7c0b2 ; CHECK: String: 0x{{[0-9a-f]*}} "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE" Index: llvm/test/DebugInfo/Generic/apple-names-hash-collisions.ll =================================================================== --- llvm/test/DebugInfo/Generic/apple-names-hash-collisions.ll +++ llvm/test/DebugInfo/Generic/apple-names-hash-collisions.ll @@ -15,14 +15,14 @@ ; FOUND_VARS: DW_AT_name ("bb") ; FOUND_VARS: DW_AT_name ("cA") -; ALL_ENTRIES: Apple accelerator entries with name = "bb": -; ALL_ENTRIES: DW_AT_name ("bb") ; ALL_ENTRIES: Apple accelerator entries with name = "cA": ; ALL_ENTRIES: DW_AT_name ("cA") ; ALL_ENTRIES: Apple accelerator entries with name = "some_other_hash": ; ALL_ENTRIES: DW_AT_name ("some_other_hash") ; ALL_ENTRIES: Apple accelerator entries with name = "int": ; ALL_ENTRIES: DW_AT_name ("int") +; ALL_ENTRIES: Apple accelerator entries with name = "bb": +; ALL_ENTRIES: DW_AT_name ("bb") @bb = global i32 200, align 4, !dbg !0 @cA = global i32 10, align 4, !dbg !5 Index: llvm/test/DebugInfo/Generic/debug-names-hash-collisions.ll =================================================================== --- llvm/test/DebugInfo/Generic/debug-names-hash-collisions.ll +++ llvm/test/DebugInfo/Generic/debug-names-hash-collisions.ll @@ -29,21 +29,21 @@ ; Check that all the names are present in the output ; CHECK: Bucket 0 ; CHECK: Hash: 0xF8CF70D -; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBBlockaSERKS0_" -; CHECK: Hash: 0xF8CF70D ; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBBlockC1ERKS0_" -; CHECK: Hash: 0x135A482C -; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBErroraSERKS0_" +; CHECK: Hash: 0xF8CF70D +; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBBlockaSERKS0_" ; CHECK: Hash: 0x135A482C ; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBErrorC1ERKS0_" +; CHECK: Hash: 0x135A482C +; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZN4lldb7SBErroraSERKS0_" ; CHECK-NOT: String: ; CHECK: Bucket 1 ; CHECK-NEXT: EMPTY ; CHECK: Bucket 2 ; CHECK: Hash: 0x2841B989 -; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZL11numCommutes" -; CHECK: Hash: 0x2841B989 ; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZL11NumCommutes" +; CHECK: Hash: 0x2841B989 +; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZL11numCommutes" ; CHECK: Hash: 0x3E190F5F ; CHECK-NEXT:String: 0x{{[0-9a-f]*}} "_ZL9NumRemats" ; CHECK: Hash: 0x3E190F5F Index: llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp =================================================================== --- llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm-dwarfdump.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringSet.h" @@ -464,7 +463,7 @@ static void findAllApple( DWARFContext &DICtx, raw_ostream &OS, std::function GetNameForDWARFReg) { - MapVector> NameToDies; + StringMap> NameToDies; auto PushDIEs = [&](const AppleAcceleratorTable &Accel) { for (const auto &Entry : Accel.entries()) { Index: llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -238,7 +238,6 @@ "sha512intrin.h", "shaintrin.h", "sifive_vector.h", - "sm3intrin.h", "smmintrin.h", "stdalign.h", "stdarg.h",