diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -150,7 +150,6 @@ std::vector> SymbolInfoTable; bool g_atmi_initialized = false; -bool g_atmi_hostcall_required = false; /* atlc is all internal global values. @@ -1137,8 +1136,6 @@ return err; } SymbolInfoTable[gpu][std::string(name)] = info; - if (strcmp(name, "needs_hostcall_buffer") == 0) - g_atmi_hostcall_required = true; free(name); } else { DEBUG_PRINT("Symbol is an indirect function\n"); diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -335,6 +335,8 @@ // Resource pools SignalPoolT FreeSignalPool; + bool hostcall_required = false; + std::vector HSAExecutables; struct atmiFreePtrDeletor { @@ -1158,6 +1160,12 @@ return ATMI_STATUS_SUCCESS; } +static bool image_contains_symbol(void *data, size_t size, const char *sym) { + symbol_info si; + int rc = get_symbol_info_without_loading((char *)data, size, sym, &si); + return (rc == 0) && (si.addr != nullptr); +} + __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image) { // This function loads the device image onto gpu[device_id] and does other @@ -1199,7 +1207,13 @@ atmi_status_t err = module_register_from_memory_to_place( (void *)image->ImageStart, img_size, get_gpu_place(device_id), - [&](void *data, size_t size) { return env.before_loading(data, size); }, + [&](void *data, size_t size) { + if (image_contains_symbol(data, size, "needs_hostcall_buffer")) { + __atomic_store_n(&DeviceInfo.hostcall_required, true, + __ATOMIC_RELEASE); + } + return env.before_loading(data, size); + }, DeviceInfo.HSAExecutables); check("Module registering", err); @@ -1735,8 +1749,6 @@ return packet_id; } -extern bool g_atmi_hostcall_required; // declared without header by atmi - static int32_t __tgt_rtl_run_target_team_region_locked( int32_t device_id, void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t num_teams, @@ -1905,7 +1917,7 @@ impl_args->offset_z = 0; // assign a hostcall buffer for the selected Q - if (g_atmi_hostcall_required) { + if (__atomic_load_n(&DeviceInfo.hostcall_required, __ATOMIC_ACQUIRE)) { // hostrpc_assign_buffer is not thread safe, and this function is // under a multiple reader lock, not a writer lock. static pthread_mutex_t hostcall_init_lock = PTHREAD_MUTEX_INITIALIZER;