diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h --- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h +++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h @@ -142,6 +142,8 @@ void *data, uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t **queue); +hsa_status_t hsa_queue_destroy(hsa_queue_t *queue); + uint64_t hsa_queue_load_read_index_scacquire(const hsa_queue_t *queue); uint64_t hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue, diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp --- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp +++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp @@ -29,6 +29,7 @@ DLWRAP(hsa_signal_store_screlease, 2); DLWRAP(hsa_signal_wait_scacquire, 5); DLWRAP(hsa_queue_create, 8); +DLWRAP(hsa_queue_destroy, 1); DLWRAP(hsa_queue_load_read_index_scacquire, 1); DLWRAP(hsa_queue_add_write_index_relaxed, 2); DLWRAP(hsa_memory_copy, 3); diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -451,6 +451,17 @@ HSALifetime HSA; // First field => constructed first and destructed last std::vector> FuncGblEntries; + struct QueueDeleter { + void operator()(hsa_queue_t *Q) { + if (Q) { + hsa_status_t Err = hsa_queue_destroy(Q); + if (Err != HSA_STATUS_SUCCESS) { + DP("Error destroying hsa queue: %s\n", get_error_string(Err)); + } + } + } + }; + public: // load binary populates symbol tables and mutates various global state // run uses those symbol tables @@ -460,7 +471,8 @@ // GPU devices std::vector HSAAgents; - std::vector HSAQueues; // one per gpu + std::vector> + HSAQueues; // one per gpu // CPUs std::vector CPUAgents; @@ -773,10 +785,6 @@ return; } - for (int i = 0; i < NumberOfDevices; i++) { - HSAQueues[i] = nullptr; - } - for (int i = 0; i < NumberOfDevices; i++) { uint32_t queue_size = 0; { @@ -792,12 +800,16 @@ } } - hsa_status_t rc = hsa_queue_create( - HSAAgents[i], queue_size, HSA_QUEUE_TYPE_MULTI, callbackQueue, NULL, - UINT32_MAX, UINT32_MAX, &HSAQueues[i]); - if (rc != HSA_STATUS_SUCCESS) { - DP("Failed to create HSA queue %d\n", i); - return; + { + hsa_queue_t *Q = nullptr; + hsa_status_t rc = + hsa_queue_create(HSAAgents[i], queue_size, HSA_QUEUE_TYPE_MULTI, + callbackQueue, NULL, UINT32_MAX, UINT32_MAX, &Q); + if (rc != HSA_STATUS_SUCCESS) { + DP("Failed to create HSA queue %d\n", i); + return; + } + HSAQueues[i].reset(Q); } deviceStateStore[i] = {nullptr, 0}; @@ -2149,7 +2161,7 @@ // Run on the device. { - hsa_queue_t *queue = DeviceInfo.HSAQueues[device_id]; + hsa_queue_t *queue = DeviceInfo.HSAQueues[device_id].get(); if (!queue) { return OFFLOAD_FAIL; }