diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h @@ -39,25 +39,6 @@ * @{ */ -/** - * @brief ATMI Compute Place - */ -typedef struct atmi_place_s { - /** - * The node in a cluster where computation should occur. - * Default is node_id = 0 for local computations. - */ - unsigned int node_id; - /** - * Device type: CPU, GPU or DSP - */ - atmi_devtype_t type; - /** - * The device ordinal number ordered by runtime; -1 for any - */ - int device_id; -} atmi_place_t; - /** * @brief ATMI Memory Space/region Structure */ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h @@ -51,7 +51,7 @@ * */ hsa_status_t atmi_module_register_from_memory_to_place( - void *module_bytes, size_t module_size, atmi_place_t place, + void *module_bytes, size_t module_size, int DeviceId, hsa_status_t (*on_deserialized_data)(void *data, size_t size, void *cb_state), void *cb_state); diff --git a/openmp/libomptarget/plugins/amdgpu/impl/machine.h b/openmp/libomptarget/plugins/amdgpu/impl/machine.h --- a/openmp/libomptarget/plugins/amdgpu/impl/machine.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/machine.h @@ -79,8 +79,7 @@ const int mem_id); extern ATLMachine g_atl_machine; -template T &get_processor(atmi_place_t place) { - int dev_id = place.device_id; +template T &get_processor(int dev_id) { if (dev_id == -1) { // user is asking runtime to pick a device // best device of this type? pick 0 for now diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h --- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h @@ -51,7 +51,7 @@ // modules static hsa_status_t RegisterModuleFromMemory( - void *, size_t, atmi_place_t, + void *, size_t, int DeviceId, hsa_status_t (*on_deserialized_data)(void *data, size_t size, void *cb_state), void *cb_state, std::vector &HSAExecutables); diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -491,8 +491,7 @@ std::vector gpu_agents; int gpu_count = g_atl_machine.processorCount(); for (int gpu = 0; gpu < gpu_count; gpu++) { - atmi_place_t place = ATMI_PLACE_GPU(0, gpu); - ATLGPUProcessor &proc = get_processor(place); + ATLGPUProcessor &proc = get_processor(gpu); gpu_agents.push_back(proc.agent()); } atlc.g_tasks_initialized = true; @@ -1069,16 +1068,15 @@ hsa_status_t RegisterModuleFromMemory( std::map &KernelInfoTable, std::map &SymbolInfoTable, - void *module_bytes, size_t module_size, atmi_place_t place, + void *module_bytes, size_t module_size, int gpu, hsa_status_t (*on_deserialized_data)(void *data, size_t size, void *cb_state), void *cb_state, std::vector &HSAExecutables) { hsa_status_t err; - int gpu = place.device_id; assert(gpu >= 0); DEBUG_PRINT("Trying to load module to GPU-%d\n", gpu); - ATLGPUProcessor &proc = get_processor(place); + ATLGPUProcessor &proc = get_processor(gpu); hsa_agent_t agent = proc.agent(); hsa_executable_t executable = {0}; hsa_profile_t agent_profile; diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -90,7 +90,7 @@ hsa_status_t RegisterModuleFromMemory( std::map &KernelInfo, std::map &SymbolInfoTable, void *, size_t, - atmi_place_t, + int DeviceId, hsa_status_t (*on_deserialized_data)(void *data, size_t size, void *cb_state), void *cb_state, std::vector &HSAExecutables); @@ -242,11 +242,6 @@ /// FIXME: we may need this to be per device and per library. std::list KernelsList; -// ATMI API to get gpu and gpu memory place -static atmi_place_t get_gpu_place(int device_id) { - return ATMI_PLACE_GPU(0, device_id); -} - static std::vector find_gpu_agents() { std::vector res; @@ -1025,14 +1020,14 @@ hsa_status_t module_register_from_memory_to_place( std::map &KernelInfoTable, std::map &SymbolInfoTable, - void *module_bytes, size_t module_size, atmi_place_t place, C cb, + void *module_bytes, size_t module_size, int DeviceId, C cb, std::vector &HSAExecutables) { auto L = [](void *data, size_t size, void *cb_state) -> hsa_status_t { C *unwrapped = static_cast(cb_state); return (*unwrapped)(data, size); }; return core::RegisterModuleFromMemory( - KernelInfoTable, SymbolInfoTable, module_bytes, module_size, place, L, + KernelInfoTable, SymbolInfoTable, module_bytes, module_size, DeviceId, L, static_cast(&cb), HSAExecutables); } } // namespace @@ -1239,8 +1234,7 @@ auto &KernelInfo = DeviceInfo.KernelInfoTable[device_id]; auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id]; hsa_status_t err = module_register_from_memory_to_place( - KernelInfo, SymbolInfo, (void *)image->ImageStart, img_size, - get_gpu_place(device_id), + KernelInfo, SymbolInfo, (void *)image->ImageStart, img_size, device_id, [&](void *data, size_t size) { if (image_contains_symbol(data, size, "needs_hostcall_buffer")) { __atomic_store_n(&DeviceInfo.hostcall_required, true,