diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h @@ -58,30 +58,6 @@ int device_id; } atmi_place_t; -/** - * @brief ATMI Memory Place - */ -typedef struct atmi_mem_place_s { - /** - * The node in a cluster where computation should occur. - * Default is node_id = 0 for local computations. - */ - unsigned int node_id; - /** - * Device type: CPU, GPU or DSP - */ - atmi_devtype_t dev_type; - /** - * The device ordinal number ordered by runtime; -1 for any - */ - int dev_id; - // atmi_memtype_t mem_type; // Fine grained or Coarse grained - /** - * The memory space/region ordinal number ordered by runtime; -1 for any - */ - int mem_id; -} atmi_mem_place_t; - /** * @brief ATMI Memory Space/region Structure */ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp @@ -67,8 +67,8 @@ } void *tempHostPtr; - atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0); - hsa_status_t ret = atmi_malloc(&tempHostPtr, size, CPU); + hsa_status_t ret = + atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU); if (ret != HSA_STATUS_SUCCESS) { DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n", size); @@ -97,8 +97,9 @@ } void *tempHostPtr; - atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0); - hsa_status_t ret = atmi_malloc(&tempHostPtr, size, CPU); + + hsa_status_t ret = + atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU); if (ret != HSA_STATUS_SUCCESS) { DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n", size); @@ -117,6 +118,7 @@ hsa_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); } -hsa_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) { - return core::Runtime::Malloc(ptr, size, place); +hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId, + atmi_devtype_t DeviceType) { + return core::Runtime::Malloc(ptr, size, DeviceId, DeviceType); } diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h @@ -48,8 +48,8 @@ */ hsa_status_t atmi_interop_hsa_get_symbol_info( const std::map &SymbolInfoTable, - atmi_mem_place_t place, const char *symbol, void **var_addr, - unsigned int *var_size); + int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size); + /** * @brief Get the HSA-specific kernel info from a kernel name * @@ -75,8 +75,8 @@ */ hsa_status_t atmi_interop_hsa_get_kernel_info( const std::map &KernelInfoTable, - atmi_mem_place_t place, const char *kernel_name, - hsa_executable_symbol_info_t info, uint32_t *value); + int DeviceId, const char *kernel_name, hsa_executable_symbol_info_t info, + uint32_t *value); /** @} */ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp @@ -8,8 +8,7 @@ hsa_status_t atmi_interop_hsa_get_symbol_info( const std::map &SymbolInfoTable, - atmi_mem_place_t place, const char *symbol, void **var_addr, - unsigned int *var_size) { + int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size) { /* // Typical usage: void *var_addr; @@ -22,8 +21,8 @@ atmi_machine_t *machine = atmi_machine_get_info(); if (!symbol || !var_addr || !var_size || !machine) return HSA_STATUS_ERROR; - if (place.dev_id < 0 || - place.dev_id >= machine->device_count_by_type[place.dev_type]) + if (DeviceId < 0 || + DeviceId >= machine->device_count_by_type[ATMI_DEVTYPE_GPU]) return HSA_STATUS_ERROR; // get the symbol info @@ -43,7 +42,7 @@ hsa_status_t atmi_interop_hsa_get_kernel_info( const std::map &KernelInfoTable, - atmi_mem_place_t place, const char *kernel_name, + int DeviceId, const char *kernel_name, hsa_executable_symbol_info_t kernel_info, uint32_t *value) { /* // Typical usage: @@ -56,8 +55,8 @@ atmi_machine_t *machine = atmi_machine_get_info(); if (!kernel_name || !value || !machine) return HSA_STATUS_ERROR; - if (place.dev_id < 0 || - place.dev_id >= machine->device_count_by_type[place.dev_type]) + if (DeviceId < 0 || + DeviceId >= machine->device_count_by_type[ATMI_DEVTYPE_GPU]) return HSA_STATUS_ERROR; hsa_status_t status = HSA_STATUS_SUCCESS; diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h @@ -17,7 +17,6 @@ #ifdef __cplusplus extern "C" { #endif - /** \defgroup module_functions ATMI Module * @{ */ @@ -99,8 +98,8 @@ * @retval ::HSA_STATUS_ERROR The function encountered errors. * */ -hsa_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place); - +hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId, + atmi_devtype_t DeviceType); /** * @brief Frees memory that was previously allocated. * diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp @@ -22,39 +22,41 @@ namespace core { namespace { -ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) { - int dev_id = place.dev_id; - switch (place.dev_type) { +ATLProcessor &get_processor_by_mem_place(int DeviceId, + atmi_devtype_t DeviceType) { + switch (DeviceType) { case ATMI_DEVTYPE_CPU: - return g_atl_machine.processors()[dev_id]; + return g_atl_machine.processors()[DeviceId]; case ATMI_DEVTYPE_GPU: - return g_atl_machine.processors()[dev_id]; + return g_atl_machine.processors()[DeviceId]; } } -hsa_amd_memory_pool_t get_memory_pool_by_mem_place(atmi_mem_place_t place) { - ATLProcessor &proc = get_processor_by_mem_place(place); - return get_memory_pool(proc, place.mem_id); +hsa_amd_memory_pool_t get_memory_pool_by_mem_place(int DeviceId, + atmi_devtype_t DeviceType) { + ATLProcessor &proc = get_processor_by_mem_place(DeviceId, DeviceType); + return get_memory_pool(proc, 0 /*Memory Type (always zero) */); } } // namespace hsa_status_t register_allocation(void *ptr, size_t size, - atmi_mem_place_t place) { - if (place.dev_type == ATMI_DEVTYPE_CPU) + atmi_devtype_t DeviceType) { + if (DeviceType == ATMI_DEVTYPE_CPU) return allow_access_to_all_gpu_agents(ptr); else return HSA_STATUS_SUCCESS; } -hsa_status_t Runtime::Malloc(void **ptr, size_t size, atmi_mem_place_t place) { - hsa_amd_memory_pool_t pool = get_memory_pool_by_mem_place(place); +hsa_status_t Runtime::Malloc(void **ptr, size_t size, int DeviceId, + atmi_devtype_t DeviceType) { + hsa_amd_memory_pool_t pool = + get_memory_pool_by_mem_place(DeviceId, DeviceType); hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr); DEBUG_PRINT("Malloced [%s %d] %p\n", - place.dev_type == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", place.dev_id, - *ptr); + DeviceType == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", DeviceId, *ptr); if (err == HSA_STATUS_SUCCESS) { - err = register_allocation(*ptr, size, place); + err = register_allocation(*ptr, size, DeviceType); } return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h --- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h @@ -209,8 +209,7 @@ } hsa_status_t register_allocation(void *addr, size_t size, - atmi_mem_place_t place); - + atmi_devtype_t DeviceType); extern bool atl_is_atmi_initialized(); bool handle_group_signal(hsa_signal_value_t value, void *arg); diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h --- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h @@ -10,6 +10,7 @@ #include "hsa.h" #include #include +#include namespace core { @@ -61,7 +62,8 @@ // data static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t); static hsa_status_t Memfree(void *); - static hsa_status_t Malloc(void **, size_t, atmi_mem_place_t); + static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId, + atmi_devtype_t DeviceType); int getMaxQueueSize() const { return env_.getMaxQueueSize(); } int getDebugMode() const { return env_.getDebugMode(); } diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -1071,11 +1071,10 @@ return err; } - atmi_mem_place_t place = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu, 0); DEBUG_PRINT("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr, info.size); err = register_allocation(reinterpret_cast(info.addr), - (size_t)info.size, place); + (size_t)info.size, ATMI_DEVTYPE_GPU); if (err != HSA_STATUS_SUCCESS) { return err; } diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -246,9 +246,6 @@ static atmi_place_t get_gpu_place(int device_id) { return ATMI_PLACE_GPU(0, device_id); } -static atmi_mem_place_t get_gpu_mem_place(int device_id) { - return ATMI_MEM_PLACE_GPU_MEM(0, device_id, 0); -} static std::vector find_gpu_agents() { std::vector res; @@ -1155,8 +1152,7 @@ void *state_ptr; uint32_t state_ptr_size; hsa_status_t err = atmi_interop_hsa_get_symbol_info( - SymbolInfo, get_gpu_mem_place(device_id), sym(), &state_ptr, - &state_ptr_size); + SymbolInfo, device_id, sym(), &state_ptr, &state_ptr_size); if (err != HSA_STATUS_SUCCESS) { DP("failed to find %s in loaded image\n", sym()); return err; @@ -1176,11 +1172,10 @@ } }; -static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, - atmi_mem_place_t place) { +static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) { uint64_t rounded = 4 * ((size + 3) / 4); void *ptr; - hsa_status_t err = atmi_malloc(&ptr, rounded, place); + hsa_status_t err = atmi_malloc(&ptr, rounded, DeviceId, ATMI_DEVTYPE_GPU); if (err != HSA_STATUS_SUCCESS) { return err; } @@ -1282,8 +1277,8 @@ uint32_t state_ptr_size; auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id]; hsa_status_t err = atmi_interop_hsa_get_symbol_info( - SymbolInfoMap, get_gpu_mem_place(device_id), - "omptarget_nvptx_device_State", &state_ptr, &state_ptr_size); + SymbolInfoMap, device_id, "omptarget_nvptx_device_State", &state_ptr, + &state_ptr_size); if (err != HSA_STATUS_SUCCESS) { DP("No device_state symbol found, skipping initialization\n"); @@ -1309,8 +1304,7 @@ if (dss.first.get() == nullptr) { assert(dss.second == 0); void *ptr = NULL; - hsa_status_t err = atmi_calloc(&ptr, device_State_bytes, - get_gpu_mem_place(device_id)); + hsa_status_t err = atmi_calloc(&ptr, device_State_bytes, device_id); if (err != HSA_STATUS_SUCCESS) { DP("Failed to allocate device_state array\n"); return NULL; @@ -1367,8 +1361,7 @@ auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id]; hsa_status_t err = atmi_interop_hsa_get_symbol_info( - SymbolInfoMap, get_gpu_mem_place(device_id), e->name, &varptr, - &varsize); + SymbolInfoMap, device_id, e->name, &varptr, &varsize); if (err != HSA_STATUS_SUCCESS) { // Inform the user what symbol prevented offloading @@ -1407,11 +1400,10 @@ DP("to find the kernel name: %s size: %lu\n", e->name, strlen(e->name)); - atmi_mem_place_t place = get_gpu_mem_place(device_id); uint32_t kernarg_segment_size; auto &KernelInfoMap = DeviceInfo.KernelInfoTable[device_id]; hsa_status_t err = atmi_interop_hsa_get_kernel_info( - KernelInfoMap, place, e->name, + KernelInfoMap, device_id, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernarg_segment_size); @@ -1578,7 +1570,7 @@ return NULL; } - hsa_status_t err = atmi_malloc(&ptr, size, get_gpu_mem_place(device_id)); + hsa_status_t err = atmi_malloc(&ptr, size, device_id, ATMI_DEVTYPE_GPU); DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size, (long long unsigned)(Elf64_Addr)ptr); ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL;