diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -66,7 +66,6 @@ impl/atmi_interop_hsa.cpp impl/data.cpp impl/get_elf_mach_gfx_name.cpp - impl/machine.cpp impl/system.cpp impl/utils.cpp impl/msgpack.cpp diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp @@ -50,7 +50,8 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest, const void *hostSrc, size_t size, - hsa_agent_t agent) { + hsa_agent_t agent, + hsa_amd_memory_pool_t MemoryPool) { hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size); // hsa_memory_copy sometimes fails in situations where @@ -61,7 +62,7 @@ } void *tempHostPtr; - hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size); + hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool); if (ret != HSA_STATUS_SUCCESS) { DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size); @@ -79,7 +80,8 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest, const void *deviceSrc, size_t size, - hsa_agent_t agent) { + hsa_agent_t agent, + hsa_amd_memory_pool_t MemoryPool) { hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size); // hsa_memory_copy sometimes fails in situations where @@ -90,7 +92,7 @@ } void *tempHostPtr; - hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size); + hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool); if (ret != HSA_STATUS_SUCCESS) { DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size); diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp @@ -7,11 +7,6 @@ //===----------------------------------------------------------------------===// #include "atmi_interop_hsa.h" #include "internal.h" -#include "machine.h" - -// TODO: need to get rid of this as well - -extern ATLMachine g_atl_machine; hsa_status_t atmi_interop_hsa_get_symbol_info( const std::map &SymbolInfoTable, diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h @@ -55,11 +55,13 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest, const void *hostSrc, size_t size, - hsa_agent_t agent); + hsa_agent_t agent, + hsa_amd_memory_pool_t MemoryPool); hsa_status_t atmi_memcpy_d2h(hsa_signal_t sig, void *hostDest, const void *deviceSrc, size_t size, - hsa_agent_t agent); + hsa_agent_t agent, + hsa_amd_memory_pool_t MemoryPool); /** @} */ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp @@ -8,7 +8,6 @@ #include "atmi_runtime.h" #include "hsa_api.h" #include "internal.h" -#include "machine.h" #include "rt.h" #include #include @@ -16,47 +15,17 @@ #include using core::TaskImpl; -extern ATLMachine g_atl_machine; namespace core { -namespace { -ATLProcessor &get_processor_by_mem_place(int DeviceId, - atmi_devtype_t DeviceType) { - switch (DeviceType) { - case ATMI_DEVTYPE_CPU: - return g_atl_machine.processors()[DeviceId]; - case ATMI_DEVTYPE_GPU: - return g_atl_machine.processors()[DeviceId]; - } -} - -hsa_amd_memory_pool_t get_memory_pool_by_mem_place(int DeviceId, - atmi_devtype_t DeviceType) { - ATLProcessor &proc = get_processor_by_mem_place(DeviceId, DeviceType); - return get_memory_pool(proc, 0 /*Memory Type (always zero) */); -} -} // namespace +hsa_status_t Runtime::HostMalloc(void **ptr, size_t size, + hsa_amd_memory_pool_t MemoryPool) { + hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, ptr); + DEBUG_PRINT("Malloced [CPU %d] %p\n", DeviceId, *ptr); -hsa_status_t Runtime::DeviceMalloc(void **ptr, size_t size, int DeviceId) { - return Runtime::Malloc(ptr, size, DeviceId, ATMI_DEVTYPE_GPU); -} - -hsa_status_t Runtime::HostMalloc(void **ptr, size_t size) { - hsa_status_t Err = Runtime::Malloc(ptr, size, 0, ATMI_DEVTYPE_CPU); - if (Err == HSA_STATUS_SUCCESS) { - Err = core::allow_access_to_all_gpu_agents(*ptr); + if (err == HSA_STATUS_SUCCESS) { + err = core::allow_access_to_all_gpu_agents(*ptr); } - return Err; -} - -hsa_status_t Runtime::Malloc(void **ptr, size_t size, int DeviceId, - atmi_devtype_t DeviceType) { - hsa_amd_memory_pool_t pool = - get_memory_pool_by_mem_place(DeviceId, DeviceType); - hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr); - DEBUG_PRINT("Malloced [%s %d] %p\n", - DeviceType == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", DeviceId, *ptr); return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; } diff --git a/openmp/libomptarget/plugins/amdgpu/impl/machine.h b/openmp/libomptarget/plugins/amdgpu/impl/machine.h deleted file mode 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/machine.h +++ /dev/null @@ -1,79 +0,0 @@ -//===--- amdgpu/impl/machine.h ------------------------------------ C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#ifndef SRC_RUNTIME_INCLUDE_MACHINE_H_ -#define SRC_RUNTIME_INCLUDE_MACHINE_H_ -#include "atmi.h" -#include "hsa_api.h" -#include "internal.h" -#include - -class ATLMemory; - -class ATLProcessor { -public: - explicit ATLProcessor(hsa_agent_t agent, - atmi_devtype_t type = ATMI_DEVTYPE_ALL) - : agent_(agent), type_(type) { - memories_.clear(); - } - void addMemory(const ATLMemory &p); - hsa_agent_t agent() const { return agent_; } - const std::vector &memories() const; - atmi_devtype_t type() const { return type_; } - -protected: - hsa_agent_t agent_; - atmi_devtype_t type_; - std::vector memories_; -}; - -class ATLCPUProcessor : public ATLProcessor { -public: - explicit ATLCPUProcessor(hsa_agent_t agent) - : ATLProcessor(agent, ATMI_DEVTYPE_CPU) {} -}; - -class ATLGPUProcessor : public ATLProcessor { -public: - explicit ATLGPUProcessor(hsa_agent_t agent, - atmi_devtype_t type = ATMI_DEVTYPE_dGPU) - : ATLProcessor(agent, type) {} -}; - -class ATLMemory { -public: - ATLMemory(hsa_amd_memory_pool_t pool, ATLProcessor p, atmi_memtype_t t) - : memory_pool_(pool), processor_(p), type_(t) {} - hsa_amd_memory_pool_t memory() const { return memory_pool_; } - - atmi_memtype_t type() const { return type_; } - -private: - hsa_amd_memory_pool_t memory_pool_; - ATLProcessor processor_; - atmi_memtype_t type_; -}; - -class ATLMachine { -public: - ATLMachine() { - cpu_processors_.clear(); - gpu_processors_.clear(); - } - template void addProcessor(const T &p); - template std::vector &processors(); - -private: - std::vector cpu_processors_; - std::vector gpu_processors_; -}; - -hsa_amd_memory_pool_t get_memory_pool(const ATLProcessor &proc, - const int mem_id); - -#endif // SRC_RUNTIME_INCLUDE_MACHINE_H_ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp b/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp deleted file mode 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp +++ /dev/null @@ -1,56 +0,0 @@ -//===--- amdgpu/impl/machine.cpp ---------------------------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "machine.h" -#include "atmi_runtime.h" -#include "hsa_api.h" -#include "internal.h" -#include -#include -#include -#include - -extern ATLMachine g_atl_machine; - -void ATLProcessor::addMemory(const ATLMemory &mem) { - for (auto &mem_obj : memories_) { - // if the memory already exists, then just return - if (mem.memory().handle == mem_obj.memory().handle) - return; - } - memories_.push_back(mem); -} - -const std::vector &ATLProcessor::memories() const { - return memories_; -} - -template <> std::vector &ATLMachine::processors() { - return cpu_processors_; -} - -template <> std::vector &ATLMachine::processors() { - return gpu_processors_; -} - -hsa_amd_memory_pool_t get_memory_pool(const ATLProcessor &proc, - const int mem_id) { - hsa_amd_memory_pool_t pool; - const std::vector &mems = proc.memories(); - assert(mems.size() && mem_id >= 0 && mem_id < mems.size() && - "Invalid memory pools for this processor"); - pool = mems[mem_id].memory(); - return pool; -} - -template <> void ATLMachine::addProcessor(const ATLCPUProcessor &p) { - cpu_processors_.push_back(p); -} - -template <> void ATLMachine::addProcessor(const ATLGPUProcessor &p) { - gpu_processors_.push_back(p); -} diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h --- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h @@ -60,16 +60,12 @@ // data static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t); static hsa_status_t Memfree(void *); - static hsa_status_t HostMalloc(void **ptr, size_t size); - static hsa_status_t DeviceMalloc(void **ptr, size_t size, int DeviceId); + static hsa_status_t HostMalloc(void **ptr, size_t size, + hsa_amd_memory_pool_t MemoryPool); int getMaxQueueSize() const { return env_.getMaxQueueSize(); } int getDebugMode() const { return env_.getDebugMode(); } -private: - static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId, - atmi_devtype_t DeviceType); - protected: Runtime() = default; ~Runtime() = default; diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -12,7 +12,6 @@ #include #include "internal.h" -#include "machine.h" #include "rt.h" #include "msgpack.h" @@ -140,241 +139,8 @@ {"hidden_hostcall_buffer", KernelArgMD::ValueKind::HiddenHostcallBuffer}, }; -ATLMachine g_atl_machine; - namespace core { -// Implement memory_pool iteration function -static hsa_status_t get_memory_pool_info(hsa_amd_memory_pool_t memory_pool, - void *data) { - ATLProcessor *proc = reinterpret_cast(data); - hsa_status_t err = HSA_STATUS_SUCCESS; - // Check if the memory_pool is allowed to allocate, i.e. do not return group - // memory - bool alloc_allowed = false; - err = hsa_amd_memory_pool_get_info( - memory_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, - &alloc_allowed); - if (err != HSA_STATUS_SUCCESS) { - printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, - "Alloc allowed in memory pool check", get_error_string(err)); - return err; - } - if (alloc_allowed) { - uint32_t global_flag = 0; - err = hsa_amd_memory_pool_get_info( - memory_pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag); - if (err != HSA_STATUS_SUCCESS) { - printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, - "Get memory pool info", get_error_string(err)); - return err; - } - if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) { - ATLMemory new_mem(memory_pool, *proc, ATMI_MEMTYPE_FINE_GRAINED); - proc->addMemory(new_mem); - } else { - ATLMemory new_mem(memory_pool, *proc, ATMI_MEMTYPE_COARSE_GRAINED); - proc->addMemory(new_mem); - } - } - - return err; -} - -static hsa_status_t get_agent_info(hsa_agent_t agent, void *data) { - hsa_status_t err = HSA_STATUS_SUCCESS; - hsa_device_type_t device_type; - err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); - if (err != HSA_STATUS_SUCCESS) { - printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, - "Get device type info", get_error_string(err)); - return err; - } - switch (device_type) { - case HSA_DEVICE_TYPE_CPU: { - ATLCPUProcessor new_proc(agent); - err = hsa_amd_agent_iterate_memory_pools(agent, get_memory_pool_info, - &new_proc); - if (err != HSA_STATUS_SUCCESS) { - printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, - "Iterate all memory pools", get_error_string(err)); - return err; - } - g_atl_machine.addProcessor(new_proc); - } break; - case HSA_DEVICE_TYPE_GPU: { - hsa_profile_t profile; - err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile); - if (err != HSA_STATUS_SUCCESS) { - printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, - "Query the agent profile", get_error_string(err)); - return err; - } - atmi_devtype_t gpu_type; - gpu_type = - (profile == HSA_PROFILE_FULL) ? ATMI_DEVTYPE_iGPU : ATMI_DEVTYPE_dGPU; - ATLGPUProcessor new_proc(agent, gpu_type); - err = hsa_amd_agent_iterate_memory_pools(agent, get_memory_pool_info, - &new_proc); - if (err != HSA_STATUS_SUCCESS) { - printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, - "Iterate all memory pools", get_error_string(err)); - return err; - } - g_atl_machine.addProcessor(new_proc); - } break; - case HSA_DEVICE_TYPE_DSP: { - err = HSA_STATUS_ERROR_INVALID_CODE_OBJECT; - } break; - } - - return err; -} - -static hsa_status_t init_compute_and_memory() { - hsa_status_t err; - - /* Iterate over the agents and pick the gpu agent */ - err = hsa_iterate_agents(get_agent_info, NULL); - if (err == HSA_STATUS_INFO_BREAK) { - err = HSA_STATUS_SUCCESS; - } - if (err != HSA_STATUS_SUCCESS) { - printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Getting a gpu agent", - get_error_string(err)); - return err; - } - - /* Init all devices or individual device types? */ - std::vector &cpu_procs = - g_atl_machine.processors(); - std::vector &gpu_procs = - g_atl_machine.processors(); - /* For CPU memory pools, add other devices that can access them directly - * or indirectly */ - for (auto &cpu_proc : cpu_procs) { - for (auto &cpu_mem : cpu_proc.memories()) { - hsa_amd_memory_pool_t pool = cpu_mem.memory(); - for (auto &gpu_proc : gpu_procs) { - hsa_agent_t agent = gpu_proc.agent(); - hsa_amd_memory_pool_access_t access; - hsa_amd_agent_memory_pool_get_info( - agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access); - if (access != 0) { - // this means not NEVER, but could be YES or NO - // add this memory pool to the proc - gpu_proc.addMemory(cpu_mem); - } - } - } - } - - /* FIXME: are the below combinations of procs and memory pools needed? - * all to all compare procs with their memory pools and add those memory - * pools that are accessible by the target procs */ - for (auto &gpu_proc : gpu_procs) { - for (auto &gpu_mem : gpu_proc.memories()) { - hsa_amd_memory_pool_t pool = gpu_mem.memory(); - for (auto &cpu_proc : cpu_procs) { - hsa_agent_t agent = cpu_proc.agent(); - hsa_amd_memory_pool_access_t access; - hsa_amd_agent_memory_pool_get_info( - agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access); - if (access != 0) { - // this means not NEVER, but could be YES or NO - // add this memory pool to the proc - cpu_proc.addMemory(gpu_mem); - } - } - } - } - - size_t num_procs = cpu_procs.size() + gpu_procs.size(); - int num_iGPUs = 0; - int num_dGPUs = 0; - for (uint32_t i = 0; i < gpu_procs.size(); i++) { - if (gpu_procs[i].type() == ATMI_DEVTYPE_iGPU) - num_iGPUs++; - else - num_dGPUs++; - } - assert(num_iGPUs + num_dGPUs == gpu_procs.size() && - "Number of dGPUs and iGPUs do not add up"); - DEBUG_PRINT("CPU Agents: %lu\n", cpu_procs.size()); - DEBUG_PRINT("iGPU Agents: %d\n", num_iGPUs); - DEBUG_PRINT("dGPU Agents: %d\n", num_dGPUs); - DEBUG_PRINT("GPU Agents: %lu\n", gpu_procs.size()); - - int cpus_begin = 0; - int cpus_end = cpu_procs.size(); - int gpus_begin = cpu_procs.size(); - int gpus_end = cpu_procs.size() + gpu_procs.size(); - int proc_index = 0; - for (int i = cpus_begin; i < cpus_end; i++) { - std::vector memories = cpu_procs[proc_index].memories(); - int fine_memories_size = 0; - int coarse_memories_size = 0; - DEBUG_PRINT("CPU memory types:\t"); - for (auto &memory : memories) { - atmi_memtype_t type = memory.type(); - if (type == ATMI_MEMTYPE_FINE_GRAINED) { - fine_memories_size++; - DEBUG_PRINT("Fine\t"); - } else { - coarse_memories_size++; - DEBUG_PRINT("Coarse\t"); - } - } - DEBUG_PRINT("\nFine Memories : %d", fine_memories_size); - DEBUG_PRINT("\tCoarse Memories : %d\n", coarse_memories_size); - proc_index++; - } - proc_index = 0; - for (int i = gpus_begin; i < gpus_end; i++) { - std::vector memories = gpu_procs[proc_index].memories(); - int fine_memories_size = 0; - int coarse_memories_size = 0; - DEBUG_PRINT("GPU memory types:\t"); - for (auto &memory : memories) { - atmi_memtype_t type = memory.type(); - if (type == ATMI_MEMTYPE_FINE_GRAINED) { - fine_memories_size++; - DEBUG_PRINT("Fine\t"); - } else { - coarse_memories_size++; - DEBUG_PRINT("Coarse\t"); - } - } - DEBUG_PRINT("\nFine Memories : %d", fine_memories_size); - DEBUG_PRINT("\tCoarse Memories : %d\n", coarse_memories_size); - proc_index++; - } - if (num_procs > 0) - return HSA_STATUS_SUCCESS; - else - return HSA_STATUS_ERROR_NOT_INITIALIZED; -} - -hsa_status_t init_hsa() { - DEBUG_PRINT("Initializing HSA..."); - hsa_status_t err = hsa_init(); - if (err != HSA_STATUS_SUCCESS) { - return err; - } - - err = init_compute_and_memory(); - if (err != HSA_STATUS_SUCCESS) - return err; - if (err != HSA_STATUS_SUCCESS) { - printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, - "After initializing compute and memory", get_error_string(err)); - return err; - } - - DEBUG_PRINT("done\n"); - return HSA_STATUS_SUCCESS; -} - hsa_status_t callbackEvent(const hsa_amd_event_t *event, void *data) { #if (ROCM_VERSION_MAJOR >= 3) || \ (ROCM_VERSION_MAJOR >= 2 && ROCM_VERSION_MINOR >= 3) @@ -417,7 +183,7 @@ hsa_status_t atl_init_gpu_context() { hsa_status_t err; - err = init_hsa(); + err = hsa_init(); if (err != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR; diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -32,7 +32,6 @@ #include "Debug.h" #include "get_elf_mach_gfx_name.h" -#include "machine.h" #include "omptargetplugin.h" #include "print_tracing.h" @@ -508,7 +507,8 @@ llvm::omp::AMDGPUGridValues.GV_Default_WG_Size; using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *, - size_t size, hsa_agent_t); + size_t size, hsa_agent_t, + hsa_amd_memory_pool_t); hsa_status_t freesignalpool_memcpy(void *dest, const void *src, size_t size, MemcpyFunc Func, int32_t deviceId) { hsa_agent_t agent = HSAAgents[deviceId]; @@ -516,7 +516,7 @@ if (s.handle == 0) { return HSA_STATUS_ERROR; } - hsa_status_t r = Func(s, dest, src, size, agent); + hsa_status_t r = Func(s, dest, src, size, agent, HostFineGrainedMemoryPool); FreeSignalPool.push(s); return r; } @@ -1413,7 +1413,8 @@ static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) { uint64_t rounded = 4 * ((size + 3) / 4); void *ptr; - hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, rounded, DeviceId); + hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(DeviceId); + hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, rounded, 0, &ptr); if (err != HSA_STATUS_SUCCESS) { return err; } @@ -1807,7 +1808,8 @@ return NULL; } - hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, size, device_id); + hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(device_id); + hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, &ptr); DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size, (long long unsigned)(Elf64_Addr)ptr); ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL;