Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1043,10 +1043,10 @@ return packet_id; } -int32_t __tgt_rtl_run_target_team_region_locked( - int32_t device_id, void *tgt_entry_ptr, void **tgt_args, - ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t num_teams, - int32_t thread_limit, uint64_t loop_tripcount) { +int32_t runRegionNowaitLocked(int32_t device_id, void *tgt_entry_ptr, + void **tgt_args, ptrdiff_t *tgt_offsets, + int32_t arg_num, int32_t num_teams, + int32_t thread_limit, uint64_t loop_tripcount) { // Set the context we are using // update thread limit content in gpu memory if un-initialized or specified // from host @@ -2219,9 +2219,9 @@ uint64_t loop_tripcount) { DeviceInfo.load_run_lock.lock_shared(); - int32_t res = __tgt_rtl_run_target_team_region_locked( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, num_teams, - thread_limit, loop_tripcount); + int32_t res = + runRegionNowaitLocked(device_id, tgt_entry_ptr, tgt_args, tgt_offsets, + arg_num, num_teams, thread_limit, loop_tripcount); DeviceInfo.load_run_lock.unlock_shared(); return res; @@ -2239,6 +2239,20 @@ thread_limit, 0); } +int32_t __tgt_rtl_run_target_team_region_async( + int32_t device_id, void *tgt_entry_ptr, void **tgt_args, + ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t num_teams, + int32_t thread_limit, uint64_t loop_tripcount) { + + DeviceInfo.load_run_lock.lock_shared(); + int32_t res = + runRegionNowaitLocked(device_id, tgt_entry_ptr, tgt_args, tgt_offsets, + arg_num, num_teams, thread_limit, loop_tripcount); + + DeviceInfo.load_run_lock.unlock_shared(); + return res; +} + int32_t __tgt_rtl_run_target_region_async(int32_t device_id, void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, @@ -2251,9 +2265,9 @@ // fix thread num int32_t team_num = 1; int32_t thread_limit = 0; // use default - return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args, - tgt_offsets, arg_num, team_num, - thread_limit, 0); + return __tgt_rtl_run_target_team_region_async(device_id, tgt_entry_ptr, + tgt_args, tgt_offsets, arg_num, + team_num, thread_limit, 0); } int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {