diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h
deleted file mode 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*===--------------------------------------------------------------------------
- *              ATMI (Asynchronous Task and Memory Interface)
- *
- * This file is distributed under the MIT License. See LICENSE.txt for details.
- *===------------------------------------------------------------------------*/
-#ifndef INCLUDE_ATMI_INTEROP_HSA_H_
-#define INCLUDE_ATMI_INTEROP_HSA_H_
-
-#include "atmi_runtime.h"
-#include "hsa.h"
-#include "hsa_ext_amd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-/** \defgroup interop_hsa_functions ATMI-HSA Interop
- *  @{
- */
-
-/**
- * @brief Get the device address and size of an HSA global symbol
- *
- * @detail Use this function to query the device address and size of an HSA
- * global symbol.
- * The symbol can be set at by the compiler or by the application writer in a
- * language-specific manner. This function is meaningful only after calling one
- * of the @p atmi_module_register functions.
- *
- * @param[in] place The ATMI memory place
- *
- * @param[in] symbol Pointer to a non-NULL global symbol name
- *
- * @param[in] var_addr Pointer to a non-NULL @p void* variable that will
- * hold the device address of the global symbol object.
- *
- * @param[in] var_size Pointer to a non-NULL @p uint variable that will
- * hold the size of the global symbol object.
- *
- * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully.
- *
- * @retval ::ATMI_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are
- * invalid
- * location in the current node, or if ATMI is not initialized.
- *
- * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors.
- */
-atmi_status_t atmi_interop_hsa_get_symbol_info(atmi_mem_place_t place,
-                                               const char *symbol,
-                                               void **var_addr,
-                                               unsigned int *var_size);
-
-/**
- * @brief Get the HSA-specific kernel info from a kernel name
- *
- * @detail Use this function to query the HSA-specific kernel info from the
- * kernel name.
- * This function is meaningful only after calling one
- * of the @p atmi_module_register functions.
- *
- * @param[in] place The ATMI memory place
- *
- * @param[in] kernel_name Pointer to a char array with the kernel name
- *
- * @param[in] info The different possible kernel properties
- *
- * @param[in] value Pointer to a non-NULL @p uint variable that will
- * hold the return value of the kernel property.
- *
- * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully.
- *
- * @retval ::ATMI_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are
- * invalid
- * location in the current node, or if ATMI is not initialized.
- *
- * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors.
- */
-atmi_status_t atmi_interop_hsa_get_kernel_info(
-    atmi_mem_place_t place, const char *kernel_name,
-    hsa_executable_symbol_info_t info, uint32_t *value);
-/** @} */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // INCLUDE_ATMI_INTEROP_HSA_H_
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
@@ -3,15 +3,14 @@
  *
  * This file is distributed under the MIT License. See LICENSE.txt for details.
  *===------------------------------------------------------------------------*/
-#include "atmi_interop_hsa.h"
 #include "internal.h"
 
 using core::atl_is_atmi_initialized;
 
-atmi_status_t atmi_interop_hsa_get_symbol_info(atmi_mem_place_t place,
-                                               const char *symbol,
-                                               void **var_addr,
-                                               unsigned int *var_size) {
+atmi_status_t atmi_interop_hsa_get_symbol_info(
+    std::vector<std::map<std::string, atl_symbol_info_t>> &SymbolInfoTable,
+    atmi_mem_place_t place, const char *symbol, void **var_addr,
+    unsigned int *var_size) {
   /*
      // Typical usage:
      void *var_addr;
@@ -46,6 +45,7 @@
 }
 
 atmi_status_t atmi_interop_hsa_get_kernel_info(
+    std::vector<std::map<std::string, atl_kernel_info_t>> &KernelInfoTable,
     atmi_mem_place_t place, const char *kernel_name,
     hsa_executable_symbol_info_t kernel_info, uint32_t *value) {
   /*
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -112,9 +112,6 @@
   uint32_t size;
 } atl_symbol_info_t;
 
-extern std::vector<std::map<std::string, atl_kernel_info_t>> KernelInfoTable;
-extern std::vector<std::map<std::string, atl_symbol_info_t>> SymbolInfoTable;
-
 // ---------------------- Kernel End -------------
 
 namespace core {
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@@ -54,12 +54,6 @@
   static atmi_status_t Finalize();
   // machine info
   static atmi_machine_t *GetMachineInfo();
-  // modules
-  static atmi_status_t RegisterModuleFromMemory(
-      void *, size_t, atmi_place_t,
-      atmi_status_t (*on_deserialized_data)(void *data, size_t size,
-                                            void *cb_state),
-      void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
 
   // data
   static atmi_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -132,9 +132,6 @@
 
 std::vector<hsa_amd_memory_pool_t> atl_gpu_kernarg_pools;
 
-std::vector<std::map<std::string, atl_kernel_info_t>> KernelInfoTable;
-std::vector<std::map<std::string, atl_symbol_info_t>> SymbolInfoTable;
-
 bool g_atmi_initialized = false;
 bool g_atmi_hostcall_required = false;
 
@@ -201,15 +198,6 @@
 atmi_status_t Runtime::Finalize() {
   hsa_status_t err;
 
-  for (uint32_t i = 0; i < SymbolInfoTable.size(); i++) {
-    SymbolInfoTable[i].clear();
-  }
-  SymbolInfoTable.clear();
-  for (uint32_t i = 0; i < KernelInfoTable.size(); i++) {
-    KernelInfoTable[i].clear();
-  }
-  KernelInfoTable.clear();
-
   atl_reset_atmi_initialized();
   err = hsa_shut_down();
   if (err != HSA_STATUS_SUCCESS) {
@@ -552,14 +540,6 @@
              "After initializing compute and memory", get_error_string(err));
       exit(1);
     }
-
-    int gpu_count = g_atl_machine.processorCount<ATLGPUProcessor>();
-    KernelInfoTable.resize(gpu_count);
-    SymbolInfoTable.resize(gpu_count);
-    for (uint32_t i = 0; i < SymbolInfoTable.size(); i++)
-      SymbolInfoTable[i].clear();
-    for (uint32_t i = 0; i < KernelInfoTable.size(); i++)
-      KernelInfoTable[i].clear();
     atlc.g_hsa_initialized = true;
     DEBUG_PRINT("done\n");
   }
@@ -833,6 +813,7 @@
 } // namespace
 
 static hsa_status_t get_code_object_custom_metadata(
+    std::vector<std::map<std::string, atl_kernel_info_t>> &KernelInfoTable,
     std::map<std::string, std::string> &KernelNameMap, void *binary,
     size_t binSize, int gpu) {
   // parse code object with different keys from v2
@@ -1006,15 +987,21 @@
 }
 
 struct PopulateInfoTablesContext {
-  PopulateInfoTablesContext(std::map<std::string, std::string> &KernelNameMap,
-                            int Device)
-      : KernelNameMap(KernelNameMap), DeviceId(Device) {}
+  PopulateInfoTablesContext(
+      std::map<std::string, std::string> &KernelNameMap,
+      std::vector<std::map<std::string, atl_kernel_info_t>> &KernelInfo,
+      std::vector<std::map<std::string, atl_symbol_info_t>> &SymbolInfo,
+      int Device)
+      : KernelNameMap(KernelNameMap), KernelInfoTable(KernelInfo),
+        SymbolInfoTable(SymbolInfo), DeviceId(Device) {}
 
   PopulateInfoTablesContext() = delete;
   PopulateInfoTablesContext(const PopulateInfoTablesContext &) = delete;
   ~PopulateInfoTablesContext() = default;
 
   std::map<std::string, std::string> &KernelNameMap;
+  std::vector<std::map<std::string, atl_kernel_info_t>> &KernelInfoTable;
+  std::vector<std::map<std::string, atl_symbol_info_t>> &SymbolInfoTable;
   int DeviceId;
 };
 
@@ -1023,8 +1010,11 @@
                                        void *data) {
   PopulateInfoTablesContext *Ctx =
       static_cast<PopulateInfoTablesContext *>(data);
-  int gpu = Ctx->DeviceId;
   auto &KernelNameMap = Ctx->KernelNameMap;
+  auto &KernelInfoTable = Ctx->KernelInfoTable;
+  auto &SymbolInfoTable = Ctx->SymbolInfoTable;
+  int gpu = Ctx->DeviceId;
+
   hsa_symbol_kind_t type;
 
   uint32_t name_length;
@@ -1171,7 +1161,9 @@
   return HSA_STATUS_SUCCESS;
 }
 
-atmi_status_t Runtime::RegisterModuleFromMemory(
+atmi_status_t RegisterModuleFromMemory(
+    std::vector<std::map<std::string, atl_kernel_info_t>> &KernelInfoTable,
+    std::vector<std::map<std::string, atl_symbol_info_t>> &SymbolInfoTable,
     void *module_bytes, size_t module_size, atmi_place_t place,
     atmi_status_t (*on_deserialized_data)(void *data, size_t size,
                                           void *cb_state),
@@ -1212,8 +1204,8 @@
       // Some metadata info is not available through ROCr API, so use custom
       // code object metadata parsing to collect such metadata info
 
-      err = get_code_object_custom_metadata(KernelNameMap, module_bytes,
-                                            module_size, gpu);
+      err = core::get_code_object_custom_metadata(
+          KernelInfoTable, KernelNameMap, module_bytes, module_size, gpu);
       if (err != HSA_STATUS_SUCCESS) {
         DEBUG_PRINT("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
                     "Getting custom code object metadata",
@@ -1267,7 +1259,8 @@
       exit(1);
     }
 
-    PopulateInfoTablesContext Ctx(KernelNameMap, gpu);
+    PopulateInfoTablesContext Ctx(KernelNameMap, KernelInfoTable,
+                                  SymbolInfoTable, gpu);
     err = hsa_executable_iterate_symbols(executable, PopulateInfoTables,
                                          static_cast<void *>(&Ctx));
     if (err != HSA_STATUS_SUCCESS) {
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -30,7 +30,6 @@
 #include <vector>
 
 // Header from ATMI interface
-#include "atmi_interop_hsa.h"
 #include "atmi_runtime.h"
 
 #include "internal.h"
@@ -87,6 +86,25 @@
 
 #include "elf_common.h"
 
+atmi_status_t atmi_interop_hsa_get_symbol_info(
+    std::vector<std::map<std::string, atl_symbol_info_t>> &SymbolInfoTable,
+    atmi_mem_place_t place, const char *symbol, void **var_addr,
+    unsigned int *var_size);
+atmi_status_t atmi_interop_hsa_get_kernel_info(
+    std::vector<std::map<std::string, atl_kernel_info_t>> &KernelInfoTable,
+    atmi_mem_place_t place, const char *kernel_name,
+    hsa_executable_symbol_info_t info, uint32_t *value);
+
+namespace core {
+atmi_status_t RegisterModuleFromMemory(
+    std::vector<std::map<std::string, atl_kernel_info_t>> &KernelInfoTable,
+    std::vector<std::map<std::string, atl_symbol_info_t>> &SymbolInfoTable,
+    void *, size_t, atmi_place_t,
+    atmi_status_t (*on_deserialized_data)(void *data, size_t size,
+                                          void *cb_state),
+    void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
+}
+
 /// Keep entries table per device
 struct FuncOrGblEntryTy {
   __tgt_target_table Table;
@@ -330,6 +348,9 @@
 
   std::vector<hsa_executable_t> HSAExecutables;
 
+  std::vector<std::map<std::string, atl_kernel_info_t>> KernelInfoTable;
+  std::vector<std::map<std::string, atl_symbol_info_t>> SymbolInfoTable;
+
   struct atmiFreePtrDeletor {
     void operator()(void *p) {
       atmi_free(p); // ignore failure to free
@@ -474,6 +495,13 @@
     NumThreads.resize(NumberOfDevices);
     deviceStateStore.resize(NumberOfDevices);
 
+    KernelInfoTable.resize(NumberOfDevices);
+    SymbolInfoTable.resize(NumberOfDevices);
+    for (uint32_t i = 0; i < SymbolInfoTable.size(); i++)
+      SymbolInfoTable[i].clear();
+    for (uint32_t i = 0; i < KernelInfoTable.size(); i++)
+      KernelInfoTable[i].clear();
+
     for (int i = 0; i < NumberOfDevices; i++) {
       uint32_t queue_size = 0;
       {
@@ -558,6 +586,8 @@
     // atmi_finalize removes access to it
     deviceStateStore.clear();
     KernelArgPoolMap.clear();
+    KernelInfoTable.clear();
+    SymbolInfoTable.clear();
     // Terminate hostrpc before finalizing ATMI
     hostrpc_terminate();
 
@@ -988,15 +1018,17 @@
 
 template <typename C>
 atmi_status_t module_register_from_memory_to_place(
+    std::vector<std::map<std::string, atl_kernel_info_t>> &KernelInfoTable,
+    std::vector<std::map<std::string, atl_symbol_info_t>> &SymbolInfoTable,
     void *module_bytes, size_t module_size, atmi_place_t place, C cb,
     std::vector<hsa_executable_t> &HSAExecutables) {
   auto L = [](void *data, size_t size, void *cb_state) -> atmi_status_t {
     C *unwrapped = static_cast<C *>(cb_state);
     return (*unwrapped)(data, size);
   };
-  return core::Runtime::RegisterModuleFromMemory(
-      module_bytes, module_size, place, L, static_cast<void *>(&cb),
-      HSAExecutables);
+  return core::RegisterModuleFromMemory(
+      KernelInfoTable, SymbolInfoTable, module_bytes, module_size, place, L,
+      static_cast<void *>(&cb), HSAExecutables);
 }
 } // namespace
 
@@ -1116,7 +1148,8 @@
         void *state_ptr;
         uint32_t state_ptr_size;
         atmi_status_t err = atmi_interop_hsa_get_symbol_info(
-            get_gpu_mem_place(device_id), sym(), &state_ptr, &state_ptr_size);
+            DeviceInfo.SymbolInfoTable, get_gpu_mem_place(device_id), sym(),
+            &state_ptr, &state_ptr_size);
         if (err != ATMI_STATUS_SUCCESS) {
           DP("failed to find %s in loaded image\n", sym());
           return err;
@@ -1196,6 +1229,7 @@
                                   img_size);
 
     atmi_status_t err = module_register_from_memory_to_place(
+        DeviceInfo.KernelInfoTable, DeviceInfo.SymbolInfoTable,
         (void *)image->ImageStart, img_size, get_gpu_place(device_id),
         [&](void *data, size_t size) { return env.before_loading(data, size); },
         DeviceInfo.HSAExecutables);
@@ -1226,8 +1260,8 @@
     void *state_ptr;
     uint32_t state_ptr_size;
     atmi_status_t err = atmi_interop_hsa_get_symbol_info(
-        get_gpu_mem_place(device_id), "omptarget_nvptx_device_State",
-        &state_ptr, &state_ptr_size);
+        DeviceInfo.SymbolInfoTable, get_gpu_mem_place(device_id),
+        "omptarget_nvptx_device_State", &state_ptr, &state_ptr_size);
 
     if (err != ATMI_STATUS_SUCCESS) {
       DP("No device_state symbol found, skipping initialization\n");
@@ -1310,7 +1344,8 @@
       uint32_t varsize;
 
       atmi_status_t err = atmi_interop_hsa_get_symbol_info(
-          get_gpu_mem_place(device_id), e->name, &varptr, &varsize);
+          DeviceInfo.SymbolInfoTable, get_gpu_mem_place(device_id), e->name,
+          &varptr, &varsize);
 
       if (err != ATMI_STATUS_SUCCESS) {
         // Inform the user what symbol prevented offloading
@@ -1352,7 +1387,8 @@
     atmi_mem_place_t place = get_gpu_mem_place(device_id);
     uint32_t kernarg_segment_size;
     atmi_status_t err = atmi_interop_hsa_get_kernel_info(
-        place, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
+        DeviceInfo.KernelInfoTable, place, e->name,
+        HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
         &kernarg_segment_size);
 
     // each arg is a void * in this openmp implementation
@@ -1780,6 +1816,7 @@
   KernelTy *KernelInfo = (KernelTy *)tgt_entry_ptr;
 
   std::string kernel_name = std::string(KernelInfo->Name);
+  auto &KernelInfoTable = DeviceInfo.KernelInfoTable;
   if (KernelInfoTable[device_id].find(kernel_name) ==
       KernelInfoTable[device_id].end()) {
     DP("Kernel %s not found\n", kernel_name.c_str());