diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1104,18 +1104,16 @@ DeviceInfo.GroupsPerDevice[device_id]); } - // Adjust threads to the env variables - if (DeviceInfo.EnvTeamThreadLimit > 0 && - (enforce_upper_bound(&DeviceInfo.NumThreads[device_id], - DeviceInfo.EnvTeamThreadLimit))) { - DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n", + if (DeviceInfo.EnvTeamThreadLimit > 0) { + DeviceInfo.NumThreads[device_id] = DeviceInfo.EnvTeamThreadLimit; + DP("Number of threads set according to OMP_TEAMS_THREAD_LIMIT=%d\n", DeviceInfo.EnvTeamThreadLimit); + } else { + DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size; + DP("Number of threads set according to library's default %d\n", + RTLDeviceInfoTy::Default_WG_Size); } - // Set default number of threads - DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size; - DP("Default number of threads set according to library's default %d\n", - RTLDeviceInfoTy::Default_WG_Size); if (enforce_upper_bound(&DeviceInfo.NumThreads[device_id], DeviceInfo.ThreadsPerGroup[device_id])) { DP("Default number of threads exceeds device limit, capping at %d\n", @@ -2100,7 +2098,7 @@ */ int num_groups = 0; - int threadsPerGroup = RTLDeviceInfoTy::Default_WG_Size; + int threadsPerGroup = DeviceInfo.NumThreads[KernelInfo->device_id]; getLaunchVals(threadsPerGroup, num_groups, KernelInfo->ConstWGSize, KernelInfo->ExecutionMode, DeviceInfo.EnvTeamLimit,