Changeset View
Changeset View
Standalone View
Standalone View
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
Show First 20 Lines • Show All 116 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size, | uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size, | ||||
uint16_t group_size) { | uint16_t group_size) { | ||||
uint32_t r = grid_size - group_id * group_size; | uint32_t r = grid_size - group_id * group_size; | ||||
return (r < group_size) ? r : group_size; | return (r < group_size) ? r : group_size; | ||||
} | } | ||||
} // namespace | } // namespace | ||||
EXTERN int GetNumberOfBlocksInKernel() { | EXTERN int __kmpc_get_hardware_num_blocks() { | ||||
return get_grid_dim(__builtin_amdgcn_grid_size_x(), | return get_grid_dim(__builtin_amdgcn_grid_size_x(), | ||||
__builtin_amdgcn_workgroup_size_x()); | __builtin_amdgcn_workgroup_size_x()); | ||||
} | } | ||||
EXTERN int GetNumberOfThreadsInBlock() { | EXTERN int __kmpc_get_hardware_num_threads_in_block() { | ||||
return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(), | return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(), | ||||
__builtin_amdgcn_grid_size_x(), | __builtin_amdgcn_grid_size_x(), | ||||
__builtin_amdgcn_workgroup_size_x()); | __builtin_amdgcn_workgroup_size_x()); | ||||
} | } | ||||
EXTERN unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; } | EXTERN unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; } | ||||
EXTERN unsigned GetWarpSize() { return WARPSIZE; } | EXTERN unsigned GetWarpSize() { return WARPSIZE; } | ||||
EXTERN unsigned GetLaneId() { | EXTERN unsigned GetLaneId() { | ||||
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); | return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); | ||||
} | } | ||||
EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() { | EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() { | ||||
return GetNumberOfThreadsInBlock(); | return __kmpc_get_hardware_num_threads_in_block(); | ||||
} | } | ||||
// Atomics | // Atomics | ||||
uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) { | uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) { | ||||
return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST); | return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST); | ||||
} | } | ||||
uint32_t __kmpc_atomic_inc(uint32_t *Address, uint32_t Val) { | uint32_t __kmpc_atomic_inc(uint32_t *Address, uint32_t Val) { | ||||
return __builtin_amdgcn_atomic_inc32(Address, Val, __ATOMIC_SEQ_CST, ""); | return __builtin_amdgcn_atomic_inc32(Address, Val, __ATOMIC_SEQ_CST, ""); | ||||
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines |