diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -20,7 +20,6 @@
 extern ATLMachine g_atl_machine;
 
 namespace core {
-void allow_access_to_all_gpu_agents(void *ptr);
 
 namespace {
 ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) {
@@ -39,45 +38,33 @@
 }
 } // namespace
 
-void register_allocation(void *ptr, size_t size, atmi_mem_place_t place) {
+hsa_status_t register_allocation(void *ptr, size_t size,
+                                 atmi_mem_place_t place) {
   if (place.dev_type == ATMI_DEVTYPE_CPU)
-    allow_access_to_all_gpu_agents(ptr);
+    return allow_access_to_all_gpu_agents(ptr);
+  else
+    return HSA_STATUS_SUCCESS;
 }
 
 atmi_status_t Runtime::Malloc(void **ptr, size_t size, atmi_mem_place_t place) {
-  atmi_status_t ret = ATMI_STATUS_SUCCESS;
   hsa_amd_memory_pool_t pool = get_memory_pool_by_mem_place(place);
   hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "atmi_malloc",
-           get_error_string(err));
-    exit(1);
-  }
   DEBUG_PRINT("Malloced [%s %d] %p\n",
               place.dev_type == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", place.dev_id,
               *ptr);
-  if (err != HSA_STATUS_SUCCESS)
-    ret = ATMI_STATUS_ERROR;
 
-  register_allocation(*ptr, size, place);
+  if (err == HSA_STATUS_SUCCESS) {
+    err = register_allocation(*ptr, size, place);
+  }
 
-  return ret;
+  return (err == HSA_STATUS_SUCCESS) ? ATMI_STATUS_SUCCESS : ATMI_STATUS_ERROR;
 }
 
 atmi_status_t Runtime::Memfree(void *ptr) {
-  atmi_status_t ret = ATMI_STATUS_SUCCESS;
-  hsa_status_t err;
-  err = hsa_amd_memory_pool_free(ptr);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "atmi_free",
-           get_error_string(err));
-    exit(1);
-  }
+  hsa_status_t err = hsa_amd_memory_pool_free(ptr);
   DEBUG_PRINT("Freed %p\n", ptr);
 
-  if (err != HSA_STATUS_SUCCESS)
-    ret = ATMI_STATUS_ERROR;
-  return ret;
+  return (err == HSA_STATUS_SUCCESS) ? ATMI_STATUS_SUCCESS : ATMI_STATUS_ERROR;
 }
 
 } // namespace core
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -48,12 +48,6 @@
 extern "C" {
 #endif
 
-#define check(msg, status)                                                     \
-  if (status != HSA_STATUS_SUCCESS) {                                          \
-    printf("%s failed.\n", #msg);                                              \
-    exit(1);                                                                   \
-  }
-
 #ifdef DEBUG
 #define DEBUG_PRINT(fmt, ...)                                                  \
   if (core::Runtime::getInstance().getDebugMode()) {                           \
@@ -217,15 +211,14 @@
       alignDown((intptr_t)(value + alignment - 1), alignment));
 }
 
-extern void register_allocation(void *addr, size_t size,
-                                atmi_mem_place_t place);
+hsa_status_t register_allocation(void *addr, size_t size,
+                                 atmi_mem_place_t place);
 
 extern bool atl_is_atmi_initialized();
 
 bool handle_group_signal(hsa_signal_value_t value, void *arg);
 
-
-void allow_access_to_all_gpu_agents(void *ptr);
+hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
 } // namespace core
 
 const char *get_error_string(hsa_status_t err);
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -166,20 +166,14 @@
 
 bool atl_is_atmi_initialized() { return g_atmi_initialized; }
 
-void allow_access_to_all_gpu_agents(void *ptr) {
-  hsa_status_t err;
+hsa_status_t allow_access_to_all_gpu_agents(void *ptr) {
   std::vector<ATLGPUProcessor> &gpu_procs =
       g_atl_machine.processors<ATLGPUProcessor>();
   std::vector<hsa_agent_t> agents;
   for (uint32_t i = 0; i < gpu_procs.size(); i++) {
     agents.push_back(gpu_procs[i].agent());
   }
-  err = hsa_amd_agents_allow_access(agents.size(), &agents[0], NULL, ptr);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Allow agents ptr access", get_error_string(err));
-    exit(1);
-  }
+  return hsa_amd_agents_allow_access(agents.size(), &agents[0], NULL, ptr);
 }
 
 atmi_status_t Runtime::Initialize() {
@@ -188,10 +182,11 @@
     return ATMI_STATUS_SUCCESS;
 
   if (devtype == ATMI_DEVTYPE_ALL || devtype & ATMI_DEVTYPE_GPU) {
-    if (atl_init_gpu_context() != ATMI_STATUS_SUCCESS) {
+    atmi_status_t rc = atl_init_gpu_context();
+    if (rc != ATMI_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "GPU context init",
              get_atmi_error_string(atl_init_gpu_context()));
-      exit(1);
+      return rc;
     }
   }
 
@@ -200,8 +195,7 @@
 }
 
 atmi_status_t Runtime::Finalize() {
-  hsa_status_t err;
-
+  atmi_status_t rc = ATMI_STATUS_SUCCESS;
   for (uint32_t i = 0; i < SymbolInfoTable.size(); i++) {
     SymbolInfoTable[i].clear();
   }
@@ -212,14 +206,14 @@
   KernelInfoTable.clear();
 
   atl_reset_atmi_initialized();
-  err = hsa_shut_down();
+  hsa_status_t err = hsa_shut_down();
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Shutting down HSA",
            get_error_string(err));
-    exit(1);
+    rc = ATMI_STATUS_ERROR;
   }
 
-  return ATMI_STATUS_SUCCESS;
+  return rc;
 }
 
 static void atmi_init_context_structs() {
@@ -243,7 +237,7 @@
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Alloc allowed in memory pool check", get_error_string(err));
-    exit(1);
+    return err;
   }
   if (alloc_allowed) {
     uint32_t global_flag = 0;
@@ -252,7 +246,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Get memory pool info", get_error_string(err));
-      exit(1);
+      return err;
     }
     if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
       ATLMemory new_mem(memory_pool, *proc, ATMI_MEMTYPE_FINE_GRAINED);
@@ -277,29 +271,27 @@
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Get device type info", get_error_string(err));
-    exit(1);
+    return err;
   }
   switch (device_type) {
   case HSA_DEVICE_TYPE_CPU: {
-    ;
     ATLCPUProcessor new_proc(agent);
     err = hsa_amd_agent_iterate_memory_pools(agent, get_memory_pool_info,
                                              &new_proc);
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Iterate all memory pools", get_error_string(err));
-      exit(1);
+      return err;
     }
     g_atl_machine.addProcessor(new_proc);
   } break;
   case HSA_DEVICE_TYPE_GPU: {
-    ;
     hsa_profile_t profile;
     err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile);
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Query the agent profile", get_error_string(err));
-      exit(1);
+      return err;
     }
     atmi_devtype_t gpu_type;
     gpu_type =
@@ -310,7 +302,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Iterate all memory pools", get_error_string(err));
-      exit(1);
+      return err;
     }
     g_atl_machine.addProcessor(new_proc);
   } break;
@@ -368,10 +360,8 @@
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Getting a gpu agent",
            get_error_string(err));
-    exit(1);
-  }
-  if (err != HSA_STATUS_SUCCESS)
     return err;
+  }
 
   /* Init all devices or individual device types? */
   std::vector<ATLCPUProcessor> &cpu_procs =
@@ -510,7 +500,7 @@
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Finding a CPU kernarg memory region handle",
              get_error_string(err));
-      exit(1);
+      return err;
     }
   }
   hsa_region_t atl_gpu_kernarg_region;
@@ -524,7 +514,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Finding a kernarg memory region", get_error_string(err));
-      exit(1);
+      return err;
     }
   }
   if (num_procs > 0)
@@ -540,7 +530,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Initializing the hsa runtime", get_error_string(err));
-      exit(1);
+      return err;
     }
     if (err != HSA_STATUS_SUCCESS)
       return err;
@@ -551,7 +541,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "After initializing compute and memory", get_error_string(err));
-      exit(1);
+      return err;
     }
 
     int gpu_count = g_atl_machine.processorCount<ATLGPUProcessor>();
@@ -635,7 +625,7 @@
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Registering the system for memory faults", get_error_string(err));
-    exit(1);
+    return ATMI_STATUS_ERROR;
   }
 
   init_tasks();
@@ -1018,7 +1008,7 @@
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Symbol info extraction", get_error_string(err));
-    exit(1);
+    return err;
   }
   DEBUG_PRINT("Exec Symbol type: %d\n", type);
   if (type == HSA_SYMBOL_KIND_KERNEL) {
@@ -1027,7 +1017,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
     char *name = reinterpret_cast<char *>(malloc(name_length + 1));
     err = hsa_executable_symbol_get_info(symbol,
@@ -1035,7 +1025,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
     name[name_length] = 0;
 
@@ -1043,12 +1033,7 @@
       // did not find kernel name in the kernel map; this can happen only
       // if the ROCr API for getting symbol info (name) is different from
       // the comgr method of getting symbol info
-      if (HSA_STATUS_ERROR_INVALID_CODE_OBJECT != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Invalid kernel name",
-               get_error_string(HSA_STATUS_ERROR_INVALID_CODE_OBJECT));
-        exit(1);
-      }
+      return HSA_STATUS_ERROR;
     }
     atl_kernel_info_t info;
     std::string kernelName = KernelNameMap[std::string(name)];
@@ -1056,12 +1041,7 @@
     // because the non-ROCr custom code object parsing is called before
     // iterating over the code object symbols using ROCr
     if (KernelInfoTable[gpu].find(kernelName) == KernelInfoTable[gpu].end()) {
-      if (HSA_STATUS_ERROR_INVALID_CODE_OBJECT != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Finding the entry kernel info table",
-               get_error_string(HSA_STATUS_ERROR_INVALID_CODE_OBJECT));
-        exit(1);
-      }
+      return HSA_STATUS_ERROR;
     }
     // found, so assign and update
     info = KernelInfoTable[gpu][kernelName];
@@ -1074,7 +1054,7 @@
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Extracting the symbol from the executable",
              get_error_string(err));
-      exit(1);
+      return err;
     }
     err = hsa_executable_symbol_get_info(
         symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
@@ -1083,7 +1063,7 @@
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Extracting the group segment size from the executable",
              get_error_string(err));
-      exit(1);
+      return err;
     }
     err = hsa_executable_symbol_get_info(
         symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
@@ -1092,7 +1072,7 @@
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Extracting the private segment from the executable",
              get_error_string(err));
-      exit(1);
+      return err;
     }
 
     DEBUG_PRINT(
@@ -1110,7 +1090,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
     char *name = reinterpret_cast<char *>(malloc(name_length + 1));
     err = hsa_executable_symbol_get_info(symbol,
@@ -1118,7 +1098,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
     name[name_length] = 0;
 
@@ -1129,7 +1109,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info address extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
 
     err = hsa_executable_symbol_get_info(
@@ -1137,14 +1117,17 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info size extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
 
     atmi_mem_place_t place = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu, 0);
     DEBUG_PRINT("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr,
                 info.size);
-    register_allocation(reinterpret_cast<void *>(info.addr), (size_t)info.size,
-                        place);
+    err = register_allocation(reinterpret_cast<void *>(info.addr),
+                              (size_t)info.size, place);
+    if (err != HSA_STATUS_SUCCESS) {
+      return err;
+    }
     SymbolInfoTable[gpu][std::string(name)] = info;
     if (strcmp(name, "needs_hostcall_buffer") == 0)
       g_atmi_hostcall_required = true;
@@ -1174,7 +1157,7 @@
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Query the agent profile", get_error_string(err));
-    exit(1);
+    return ATMI_STATUS_ERROR;
   }
   // FIXME: Assume that every profile is FULL until we understand how to build
   // GCN with base profile
@@ -1185,7 +1168,7 @@
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Create the executable", get_error_string(err));
-    exit(1);
+    return ATMI_STATUS_ERROR;
   }
 
   bool module_load_success = false;
@@ -1223,7 +1206,7 @@
         printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
                "Error in deserialized_data callback",
                get_atmi_error_string(atmi_err));
-        exit(1);
+        return atmi_err;
       }
 
       /* Load the code object.  */
@@ -1246,7 +1229,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Freeze the executable", get_error_string(err));
-      exit(1);
+      return ATMI_STATUS_ERROR;
     }
 
     err = hsa_executable_iterate_symbols(executable, populate_InfoTables,
@@ -1254,7 +1237,7 @@
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Iterating over symbols for execuatable", get_error_string(err));
-      exit(1);
+      return ATMI_STATUS_ERROR;
     }
 
     // save the executable and destroy during finalize
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp b/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
--- a/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
@@ -99,7 +99,6 @@
     std::cout << "ATMI_MAX_HSA_QUEUE_SIZE : positive integer" << std::endl
               << "ATMI_DEBUG : 1 for printing out trace/debug info"
               << std::endl;
-    exit(0);
   }
 
   var = GetEnv("ATMI_MAX_HSA_QUEUE_SIZE");
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -72,7 +72,6 @@
 
 int print_kernel_trace;
 
-#undef check // Drop definition from internal.h
 #ifdef OMPTARGET_DEBUG
 #define check(msg, status)                                                     \
   if (status != ATMI_STATUS_SUCCESS) {                                         \
@@ -116,11 +115,8 @@
   ~KernelArgPool() {
     if (kernarg_region) {
       auto r = hsa_amd_memory_pool_free(kernarg_region);
-      assert(r == HSA_STATUS_SUCCESS);
       if (r != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Memory pool free", get_error_string(r));
-        exit(1);
+        DP("hsa_amd_memory_pool_free failed: %s\n", get_error_string(r));
       }
     }
   }
@@ -135,22 +131,33 @@
 
     // atmi uses one pool per kernel for all gpus, with a fixed upper size
     // preserving that exact scheme here, including the queue<int>
-    {
-      hsa_status_t err = hsa_amd_memory_pool_allocate(
-          atl_gpu_kernarg_pools[0],
-          kernarg_size_including_implicit() * MAX_NUM_KERNELS, 0,
-          &kernarg_region);
-      if (err != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Allocating memory for the executable-kernel",
-               get_error_string(err));
-        exit(1);
-      }
-      core::allow_access_to_all_gpu_agents(kernarg_region);
 
-      for (int i = 0; i < MAX_NUM_KERNELS; i++) {
-        free_kernarg_segments.push(i);
+    hsa_status_t err = hsa_amd_memory_pool_allocate(
+        atl_gpu_kernarg_pools[0],
+        kernarg_size_including_implicit() * MAX_NUM_KERNELS, 0,
+        &kernarg_region);
+
+    if (err != HSA_STATUS_SUCCESS) {
+      DP("hsa_amd_memory_pool_allocate failed: %s\n", get_error_string(err));
+      kernarg_region = nullptr; // paranoid
+      return;
+    }
+
+    err = core::allow_access_to_all_gpu_agents(kernarg_region);
+    if (err != HSA_STATUS_SUCCESS) {
+      DP("hsa allow_access_to_all_gpu_agents failed: %s\n",
+         get_error_string(err));
+      auto r = hsa_amd_memory_pool_free(kernarg_region);
+      if (r != HSA_STATUS_SUCCESS) {
+        // if free failed, can't do anything more to resolve it
+        DP("hsa memory poll free failed: %s\n", get_error_string(err));
       }
+      kernarg_region = nullptr;
+      return;
+    }
+
+    for (int i = 0; i < MAX_NUM_KERNELS; i++) {
+      free_kernarg_segments.push(i);
     }
   }
 
@@ -474,17 +481,18 @@
     NumThreads.resize(NumberOfDevices);
     deviceStateStore.resize(NumberOfDevices);
 
+    for (int i = 0; i < NumberOfDevices; i++) {
+      HSAQueues[i] = nullptr;
+    }
+
     for (int i = 0; i < NumberOfDevices; i++) {
       uint32_t queue_size = 0;
       {
-        hsa_status_t err;
-        err = hsa_agent_get_info(HSAAgents[i], HSA_AGENT_INFO_QUEUE_MAX_SIZE,
-                                 &queue_size);
+        hsa_status_t err = hsa_agent_get_info(
+            HSAAgents[i], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
         if (err != HSA_STATUS_SUCCESS) {
-          printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-                 "Querying the agent maximum queue size",
-                 get_error_string(err));
-          exit(1);
+          DP("HSA query QUEUE_MAX_SIZE failed for agent %d\n", i);
+          return;
         }
         if (queue_size > core::Runtime::getInstance().getMaxQueueSize()) {
           queue_size = core::Runtime::getInstance().getMaxQueueSize();
@@ -495,7 +503,7 @@
           HSAAgents[i], queue_size, HSA_QUEUE_TYPE_MULTI, callbackQueue, NULL,
           UINT32_MAX, UINT32_MAX, &HSAQueues[i]);
       if (rc != HSA_STATUS_SUCCESS) {
-        DP("Failed to create HSA queues\n");
+        DP("Failed to create HSA queue %d\n", i);
         return;
       }
 
@@ -540,18 +548,6 @@
     RequiresFlags = OMP_REQ_UNDEFINED;
   }
 
-  void DestroyHSAExecutables() {
-    hsa_status_t Err;
-    for (uint32_t I = 0; I < HSAExecutables.size(); I++) {
-      Err = hsa_executable_destroy(HSAExecutables[I]);
-      if (Err != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Destroying executable", get_error_string(Err));
-        return;
-      }
-    }
-  }
-
   ~RTLDeviceInfoTy() {
     DP("Finalizing the HSA-ATMI DeviceInfo.\n");
     // Run destructors on types that use HSA before
@@ -561,7 +557,14 @@
     // Terminate hostrpc before finalizing ATMI
     hostrpc_terminate();
 
-    DestroyHSAExecutables();
+    for (uint32_t I = 0; I < HSAExecutables.size(); I++) {
+      hsa_status_t Err = hsa_executable_destroy(HSAExecutables[I]);
+      if (Err != HSA_STATUS_SUCCESS) {
+        DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
+           "Destroying executable", get_error_string(Err));
+      }
+    }
+
     atmi_finalize();
   }
 };
@@ -1010,9 +1013,8 @@
 
     if (rc == 0) {
       if (size_si.size != sizeof(uint64_t)) {
-        fprintf(stderr,
-                "Found device_State_size variable with wrong size, aborting\n");
-        exit(1);
+        DP("Found device_State_size variable with wrong size\n");
+        return 0;
       }
 
       // Read number of bytes directly from the elf
@@ -1828,6 +1830,9 @@
   // Run on the device.
   {
     hsa_queue_t *queue = DeviceInfo.HSAQueues[device_id];
+    if (!queue) {
+      return OFFLOAD_FAIL;
+    }
     uint64_t packet_id = acquire_available_packet_id(queue);
 
     const uint32_t mask = queue->size - 1; // size is a power of 2
@@ -1867,8 +1872,8 @@
       }
     }
     if (!ArgPool) {
-      fprintf(stderr, "Warning: No ArgPool for %s on device %d\n",
-              KernelInfo->Name, device_id);
+      DP("Warning: No ArgPool for %s on device %d\n", KernelInfo->Name,
+         device_id);
     }
     {
       void *kernarg = nullptr;
@@ -1877,8 +1882,8 @@
         kernarg = ArgPool->allocate(arg_num);
       }
       if (!kernarg) {
-        printf("Allocate kernarg failed\n");
-        exit(1);
+        DP("Allocate kernarg failed\n");
+        return OFFLOAD_FAIL;
       }
 
       // Copy explicit arguments
@@ -1919,8 +1924,8 @@
     {
       hsa_signal_t s = DeviceInfo.FreeSignalPool.pop();
       if (s.handle == 0) {
-        printf("Failed to get signal instance\n");
-        exit(1);
+        DP("Failed to get signal instance\n");
+        return OFFLOAD_FAIL;
       }
       packet->completion_signal = s;
       hsa_signal_store_relaxed(packet->completion_signal, 1);