Diff 392486

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Show First 20 Lines • Show All 1,037 Lines • ▼ Show 20 Lines	static uint64_t acquire_available_packet_id(hsa_queue_t *queue) {
bool full = true;		bool full = true;
while (full) {		while (full) {
full =		full =
packet_id >= (queue->size + hsa_queue_load_read_index_scacquire(queue));		packet_id >= (queue->size + hsa_queue_load_read_index_scacquire(queue));
}		}
return packet_id;		return packet_id;
}		}

int32_t __tgt_rtl_run_target_team_region_locked(		int32_t runRegionNowaitLocked(
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -int32_t runRegionNowaitLocked( - int32_t device_id, void tgt_entry_ptr, void tgt_args, - ptrdiff_t tgt_offsets, int32_t arg_num, int32_t num_teams, - int32_t thread_limit, uint64_t loop_tripcount) { +int32_t runRegionNowaitLocked(int32_t device_id, void tgt_entry_ptr, + void tgt_args, ptrdiff_t tgt_offsets, + int32_t arg_num, int32_t num_teams, + int32_t thread_limit, uint64_t loop_tripcount) { Lint: Pre-merge checks: clang-format: please reformat the code ``` -int32_t runRegionNowaitLocked( - int32_t…
int32_t device_id, void tgt_entry_ptr, void *tgt_args,		int32_t device_id, void tgt_entry_ptr, void *tgt_args,
ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t num_teams,		ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t num_teams,
int32_t thread_limit, uint64_t loop_tripcount) {		int32_t thread_limit, uint64_t loop_tripcount) {
// Set the context we are using		// Set the context we are using
// update thread limit content in gpu memory if un-initialized or specified		// update thread limit content in gpu memory if un-initialized or specified
// from host		// from host

DP("Run target team region thread_limit %d\n", thread_limit);		DP("Run target team region thread_limit %d\n", thread_limit);
▲ Show 20 Lines • Show All 1,159 Lines • ▼ Show 20 Lines
int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,		int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
void **tgt_args,		void **tgt_args,
ptrdiff_t *tgt_offsets,		ptrdiff_t *tgt_offsets,
int32_t arg_num, int32_t num_teams,		int32_t arg_num, int32_t num_teams,
int32_t thread_limit,		int32_t thread_limit,
uint64_t loop_tripcount) {		uint64_t loop_tripcount) {

DeviceInfo.load_run_lock.lock_shared();		DeviceInfo.load_run_lock.lock_shared();
int32_t res = __tgt_rtl_run_target_team_region_locked(		int32_t res = runRegionNowaitLocked(
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - int32_t res = runRegionNowaitLocked( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, num_teams, - thread_limit, loop_tripcount); + int32_t res = + runRegionNowaitLocked(device_id, tgt_entry_ptr, tgt_args, tgt_offsets, + arg_num, num_teams, thread_limit, loop_tripcount); Lint: Pre-merge checks: clang-format: please reformat the code ``` - int32_t res = runRegionNowaitLocked…
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, num_teams,		device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, num_teams,
thread_limit, loop_tripcount);		thread_limit, loop_tripcount);

DeviceInfo.load_run_lock.unlock_shared();		DeviceInfo.load_run_lock.unlock_shared();
return res;		return res;
}		}

int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,		int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
void *tgt_args, ptrdiff_t tgt_offsets,		void *tgt_args, ptrdiff_t tgt_offsets,
int32_t arg_num) {		int32_t arg_num) {
// use one team and one thread		// use one team and one thread
// fix thread num		// fix thread num
int32_t team_num = 1;		int32_t team_num = 1;
int32_t thread_limit = 0; // use default		int32_t thread_limit = 0; // use default
return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,		return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
tgt_offsets, arg_num, team_num,		tgt_offsets, arg_num, team_num,
thread_limit, 0);		thread_limit, 0);
}		}

		int32_t __tgt_rtl_run_target_team_region_async(int32_t device_id, void *tgt_entry_ptr,
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -int32_t __tgt_rtl_run_target_team_region_async(int32_t device_id, void tgt_entry_ptr, - void tgt_args, - ptrdiff_t tgt_offsets, - int32_t arg_num, int32_t num_teams, - int32_t thread_limit, - uint64_t loop_tripcount) { +int32_t __tgt_rtl_run_target_team_region_async( + int32_t device_id, void tgt_entry_ptr, void tgt_args, + ptrdiff_t tgt_offsets, int32_t arg_num, int32_t num_teams, + int32_t thread_limit, uint64_t loop_tripcount) { Lint: Pre-merge checks: clang-format: please reformat the code ``` -int32_t __tgt_rtl_run_target_team_region_async…
		void **tgt_args,
		ptrdiff_t *tgt_offsets,
		int32_t arg_num, int32_t num_teams,
		int32_t thread_limit,
		uint64_t loop_tripcount) {

		DeviceInfo.load_run_lock.lock_shared();
		JonChesterfieldUnsubmitted Done Reply Inline Actions Naming seems wrong -the NowaitLocked function does wait, and if it didn't, we'd be missing a wait somewhere. Diff has been created without context making it difficult to say anything further. 'Context not available.' JonChesterfield: Naming seems wrong -the NowaitLocked function does wait, and if it didn't, we'd be missing a…
		int32_t res = runRegionNowaitLocked(
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - int32_t res = runRegionNowaitLocked( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, num_teams, - thread_limit, loop_tripcount); + int32_t res = + runRegionNowaitLocked(device_id, tgt_entry_ptr, tgt_args, tgt_offsets, + arg_num, num_teams, thread_limit, loop_tripcount); Lint: Pre-merge checks: clang-format: please reformat the code ``` - int32_t res = runRegionNowaitLocked…
		device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, num_teams,
		thread_limit, loop_tripcount);

		DeviceInfo.load_run_lock.unlock_shared();
		return res;
		}

int32_t __tgt_rtl_run_target_region_async(int32_t device_id,		int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
void tgt_entry_ptr, void *tgt_args,		void tgt_entry_ptr, void *tgt_args,
ptrdiff_t *tgt_offsets,		ptrdiff_t *tgt_offsets,
int32_t arg_num,		int32_t arg_num,
__tgt_async_info *AsyncInfo) {		__tgt_async_info *AsyncInfo) {
assert(AsyncInfo && "AsyncInfo is nullptr");		assert(AsyncInfo && "AsyncInfo is nullptr");
initAsyncInfo(AsyncInfo);		initAsyncInfo(AsyncInfo);

// use one team and one thread		// use one team and one thread
// fix thread num		// fix thread num
int32_t team_num = 1;		int32_t team_num = 1;
int32_t thread_limit = 0; // use default		int32_t thread_limit = 0; // use default
return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,		return __tgt_rtl_run_target_team_region_async(device_id, tgt_entry_ptr, tgt_args,
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - return __tgt_rtl_run_target_team_region_async(device_id, tgt_entry_ptr, tgt_args, - tgt_offsets, arg_num, team_num, - thread_limit, 0); + return __tgt_rtl_run_target_team_region_async(device_id, tgt_entry_ptr, + tgt_args, tgt_offsets, arg_num, + team_num, thread_limit, 0); Lint: Pre-merge checks: clang-format: please reformat the code ``` - return __tgt_rtl_run_target_team_region_async…
tgt_offsets, arg_num, team_num,		tgt_offsets, arg_num, team_num,
thread_limit, 0);		thread_limit, 0);
}		}

int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {		int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {
assert(AsyncInfo && "AsyncInfo is nullptr");		assert(AsyncInfo && "AsyncInfo is nullptr");

// Cuda asserts that AsyncInfo->Queue is non-null, but this invariant		// Cuda asserts that AsyncInfo->Queue is non-null, but this invariant
Show All 15 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[NFC][OpenMP] Prepare amdgpu plugin for asynchronous implementation of target region launch
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 392486

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[NFC][OpenMP] Prepare amdgpu plugin for asynchronous implementation of target region launchClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 392486

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

[NFC][OpenMP] Prepare amdgpu plugin for asynchronous implementation of target region launch
ClosedPublic