diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h @@ -51,25 +51,6 @@ ATMI_MEMTYPE_ANY } atmi_memtype_t; -/** - * @brief ATMI Memory Fences for Tasks. - */ -typedef enum atmi_task_fence_scope_s { - /** - * No memory fence applied; external fences have to be applied around the task - * launch/completion. - */ - ATMI_FENCE_SCOPE_NONE = 0, - /** - * The fence is applied to the device. - */ - ATMI_FENCE_SCOPE_DEVICE = 1, - /** - * The fence is applied to the entire system. - */ - ATMI_FENCE_SCOPE_SYSTEM = 2 -} atmi_task_fence_scope_t; - /** @} */ /** \defgroup common Common ATMI Structures @@ -164,20 +145,9 @@ // Below are some helper macros that can be used to setup // some of the ATMI data structures. -#define ATMI_PLACE_CPU(node, cpu_id) \ - { .node_id = node, .type = ATMI_DEVTYPE_CPU, .device_id = cpu_id } #define ATMI_PLACE_GPU(node, gpu_id) \ { .node_id = node, .type = ATMI_DEVTYPE_GPU, .device_id = gpu_id } -#define ATMI_MEM_PLACE_CPU(node, cpu_id) \ - { \ - .node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \ - .mem_id = -1 \ - } -#define ATMI_MEM_PLACE_GPU(node, gpu_id) \ - { \ - .node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \ - .mem_id = -1 \ - } + #define ATMI_MEM_PLACE_CPU_MEM(node, cpu_id, cpu_mem_id) \ { \ .node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp @@ -22,17 +22,7 @@ namespace core { void allow_access_to_all_gpu_agents(void *ptr); -const char *getPlaceStr(atmi_devtype_t type) { - switch (type) { - case ATMI_DEVTYPE_CPU: - return "CPU"; - case ATMI_DEVTYPE_GPU: - return "GPU"; - default: - return NULL; - } -} - +namespace { ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) { int dev_id = place.dev_id; switch (place.dev_type) { @@ -47,6 +37,7 @@ ATLProcessor &proc = get_processor_by_mem_place(place); return get_memory_pool(proc, place.mem_id); } +} // namespace void register_allocation(void *ptr, size_t size, atmi_mem_place_t place) { if (place.dev_type == ATMI_DEVTYPE_CPU) diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h --- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h @@ -222,17 +222,11 @@ extern void register_allocation(void *addr, size_t size, atmi_mem_place_t place); -extern hsa_amd_memory_pool_t -get_memory_pool_by_mem_place(atmi_mem_place_t place); + extern bool atl_is_atmi_initialized(); bool handle_group_signal(hsa_signal_value_t value, void *arg); -void packet_store_release(uint32_t *packet, uint16_t header, uint16_t rest); -uint16_t -create_header(hsa_packet_type_t type, int barrier, - atmi_task_fence_scope_t acq_fence = ATMI_FENCE_SCOPE_SYSTEM, - atmi_task_fence_scope_t rel_fence = ATMI_FENCE_SCOPE_SYSTEM); void allow_access_to_all_gpu_agents(void *ptr); } // namespace core diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -172,12 +172,12 @@ return &g_atmi_machine; } -void atl_set_atmi_initialized() { +static void atl_set_atmi_initialized() { // FIXME: thread safe? locks? g_atmi_initialized = true; } -void atl_reset_atmi_initialized() { +static void atl_reset_atmi_initialized() { // FIXME: thread safe? locks? g_atmi_initialized = false; } @@ -233,7 +233,7 @@ return ATMI_STATUS_SUCCESS; } -void atmi_init_context_structs() { +static void atmi_init_context_structs() { atlc_p = &atlc; atlc.struct_initialized = true; /* This only gets called one time */ atlc.g_hsa_initialized = false; @@ -609,7 +609,7 @@ return ATMI_STATUS_SUCCESS; } -bool isImplicit(KernelArgMD::ValueKind value_kind) { +static bool isImplicit(KernelArgMD::ValueKind value_kind) { switch (value_kind) { case KernelArgMD::ValueKind::HiddenGlobalOffsetX: case KernelArgMD::ValueKind::HiddenGlobalOffsetY: diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -72,9 +72,6 @@ int print_kernel_trace; -// Size of the target call stack struture -uint32_t TgtStackItemSize = 0; - #undef check // Drop definition from internal.h #ifdef OMPTARGET_DEBUG #define check(msg, status) \ @@ -275,21 +272,18 @@ } namespace core { +namespace { void packet_store_release(uint32_t *packet, uint16_t header, uint16_t rest) { __atomic_store_n(packet, header | (rest << 16), __ATOMIC_RELEASE); } -uint16_t create_header(hsa_packet_type_t type, int barrier, - atmi_task_fence_scope_t acq_fence, - atmi_task_fence_scope_t rel_fence) { - uint16_t header = type << HSA_PACKET_HEADER_TYPE; - header |= barrier << HSA_PACKET_HEADER_BARRIER; - header |= (hsa_fence_scope_t) static_cast( - acq_fence << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE); - header |= (hsa_fence_scope_t) static_cast( - rel_fence << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); +uint16_t create_header() { + uint16_t header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; return header; } +} // namespace } // namespace core /// Class containing all the device information @@ -1904,11 +1898,8 @@ hsa_signal_store_relaxed(packet->completion_signal, 1); } - core::packet_store_release( - reinterpret_cast(packet), - core::create_header(HSA_PACKET_TYPE_KERNEL_DISPATCH, 0, - ATMI_FENCE_SCOPE_SYSTEM, ATMI_FENCE_SCOPE_SYSTEM), - packet->setup); + core::packet_store_release(reinterpret_cast(packet), + core::create_header(), packet->setup); hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);