Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1043,7 +1043,7 @@ return packet_id; } -int32_t __tgt_rtl_run_target_team_region_locked( +int32_t runRegionNowaitLocked( int32_t device_id, void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t num_teams, int32_t thread_limit, uint64_t loop_tripcount) { @@ -2219,7 +2219,7 @@ uint64_t loop_tripcount) { DeviceInfo.load_run_lock.lock_shared(); - int32_t res = __tgt_rtl_run_target_team_region_locked( + int32_t res = runRegionNowaitLocked( device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, num_teams, thread_limit, loop_tripcount); @@ -2239,6 +2239,22 @@ thread_limit, 0); } +int32_t __tgt_rtl_run_target_team_region_async(int32_t device_id, void *tgt_entry_ptr, + void **tgt_args, + ptrdiff_t *tgt_offsets, + int32_t arg_num, int32_t num_teams, + int32_t thread_limit, + uint64_t loop_tripcount) { + + DeviceInfo.load_run_lock.lock_shared(); + int32_t res = runRegionNowaitLocked( + device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, num_teams, + thread_limit, loop_tripcount); + + DeviceInfo.load_run_lock.unlock_shared(); + return res; +} + int32_t __tgt_rtl_run_target_region_async(int32_t device_id, void *tgt_entry_ptr, void **tgt_args, ptrdiff_t *tgt_offsets, @@ -2251,7 +2267,7 @@ // fix thread num int32_t team_num = 1; int32_t thread_limit = 0; // use default - return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args, + return __tgt_rtl_run_target_team_region_async(device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num, thread_limit, 0); }