diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h
@@ -50,25 +50,7 @@
   ATMI_MEMTYPE_COARSE_GRAINED = 1,
   ATMI_MEMTYPE_ANY
 } atmi_memtype_t;
-
-/**
- * @brief ATMI Memory Fences for Tasks.
- */
-typedef enum atmi_task_fence_scope_s {
-  /**
-   * No memory fence applied; external fences have to be applied around the task
-   * launch/completion.
-   */
-  ATMI_FENCE_SCOPE_NONE = 0,
-  /**
-   * The fence is applied to the device.
-   */
-  ATMI_FENCE_SCOPE_DEVICE = 1,
-  /**
-   * The fence is applied to the entire system.
-   */
-  ATMI_FENCE_SCOPE_SYSTEM = 2
-} atmi_task_fence_scope_t;
+;
 
 /** @} */
 
@@ -164,20 +146,9 @@
 
 // Below are some helper macros that can be used to setup
 // some of the ATMI data structures.
-#define ATMI_PLACE_CPU(node, cpu_id)                                           \
-  { .node_id = node, .type = ATMI_DEVTYPE_CPU, .device_id = cpu_id }
 #define ATMI_PLACE_GPU(node, gpu_id)                                           \
   { .node_id = node, .type = ATMI_DEVTYPE_GPU, .device_id = gpu_id }
-#define ATMI_MEM_PLACE_CPU(node, cpu_id)                                       \
-  {                                                                            \
-    .node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id,           \
-    .mem_id = -1                                                               \
-  }
-#define ATMI_MEM_PLACE_GPU(node, gpu_id)                                       \
-  {                                                                            \
-    .node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id,           \
-    .mem_id = -1                                                               \
-  }
+
 #define ATMI_MEM_PLACE_CPU_MEM(node, cpu_id, cpu_mem_id)                       \
   {                                                                            \
     .node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id,           \
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -22,17 +22,7 @@
 namespace core {
 void allow_access_to_all_gpu_agents(void *ptr);
 
-const char *getPlaceStr(atmi_devtype_t type) {
-  switch (type) {
-  case ATMI_DEVTYPE_CPU:
-    return "CPU";
-  case ATMI_DEVTYPE_GPU:
-    return "GPU";
-  default:
-    return NULL;
-  }
-}
-
+namespace {
 ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) {
   int dev_id = place.dev_id;
   switch (place.dev_type) {
@@ -47,6 +37,7 @@
   ATLProcessor &proc = get_processor_by_mem_place(place);
   return get_memory_pool(proc, place.mem_id);
 }
+} // namespace
 
 void register_allocation(void *ptr, size_t size, atmi_mem_place_t place) {
   if (place.dev_type == ATMI_DEVTYPE_CPU)
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -222,17 +222,11 @@
 
 extern void register_allocation(void *addr, size_t size,
                                 atmi_mem_place_t place);
-extern hsa_amd_memory_pool_t
-get_memory_pool_by_mem_place(atmi_mem_place_t place);
+
 extern bool atl_is_atmi_initialized();
 
 bool handle_group_signal(hsa_signal_value_t value, void *arg);
 
-void packet_store_release(uint32_t *packet, uint16_t header, uint16_t rest);
-uint16_t
-create_header(hsa_packet_type_t type, int barrier,
-              atmi_task_fence_scope_t acq_fence = ATMI_FENCE_SCOPE_SYSTEM,
-              atmi_task_fence_scope_t rel_fence = ATMI_FENCE_SCOPE_SYSTEM);
 
 void allow_access_to_all_gpu_agents(void *ptr);
 } // namespace core
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -172,12 +172,12 @@
   return &g_atmi_machine;
 }
 
-void atl_set_atmi_initialized() {
+static void atl_set_atmi_initialized() {
   // FIXME: thread safe? locks?
   g_atmi_initialized = true;
 }
 
-void atl_reset_atmi_initialized() {
+static void atl_reset_atmi_initialized() {
   // FIXME: thread safe? locks?
   g_atmi_initialized = false;
 }
@@ -233,7 +233,7 @@
   return ATMI_STATUS_SUCCESS;
 }
 
-void atmi_init_context_structs() {
+static void atmi_init_context_structs() {
   atlc_p = &atlc;
   atlc.struct_initialized = true; /* This only gets called one time */
   atlc.g_hsa_initialized = false;
@@ -609,7 +609,7 @@
     return ATMI_STATUS_SUCCESS;
 }
 
-bool isImplicit(KernelArgMD::ValueKind value_kind) {
+static bool isImplicit(KernelArgMD::ValueKind value_kind) {
   switch (value_kind) {
   case KernelArgMD::ValueKind::HiddenGlobalOffsetX:
   case KernelArgMD::ValueKind::HiddenGlobalOffsetY:
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -72,9 +72,6 @@
 
 int print_kernel_trace;
 
-// Size of the target call stack struture
-uint32_t TgtStackItemSize = 0;
-
 #undef check // Drop definition from internal.h
 #ifdef OMPTARGET_DEBUG
 #define check(msg, status)                                                     \
@@ -275,21 +272,18 @@
 }
 
 namespace core {
+namespace {
 void packet_store_release(uint32_t *packet, uint16_t header, uint16_t rest) {
   __atomic_store_n(packet, header | (rest << 16), __ATOMIC_RELEASE);
 }
 
-uint16_t create_header(hsa_packet_type_t type, int barrier,
-                       atmi_task_fence_scope_t acq_fence,
-                       atmi_task_fence_scope_t rel_fence) {
-  uint16_t header = type << HSA_PACKET_HEADER_TYPE;
-  header |= barrier << HSA_PACKET_HEADER_BARRIER;
-  header |= (hsa_fence_scope_t) static_cast<int>(
-      acq_fence << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE);
-  header |= (hsa_fence_scope_t) static_cast<int>(
-      rel_fence << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
+uint16_t create_header() {
+  uint16_t header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
   return header;
 }
+} // namespace
 } // namespace core
 
 /// Class containing all the device information
@@ -1904,11 +1898,8 @@
       hsa_signal_store_relaxed(packet->completion_signal, 1);
     }
 
-    core::packet_store_release(
-        reinterpret_cast<uint32_t *>(packet),
-        core::create_header(HSA_PACKET_TYPE_KERNEL_DISPATCH, 0,
-                            ATMI_FENCE_SCOPE_SYSTEM, ATMI_FENCE_SCOPE_SYSTEM),
-        packet->setup);
+    core::packet_store_release(reinterpret_cast<uint32_t *>(packet),
+                               core::create_header(), packet->setup);
 
     hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);