diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -327,10 +327,9 @@ // Number of initial streams for each device. int NumInitialStreams = 32; - static constexpr const int HardTeamLimit = 1U << 16U; // 64k - static constexpr const int HardThreadLimit = 1024; - static constexpr const int DefaultNumTeams = 128; - static constexpr const int DefaultNumThreads = 128; + static constexpr const int32_t HardThreadLimit = 1024; + static constexpr const int32_t DefaultNumTeams = 128; + static constexpr const int32_t DefaultNumThreads = 128; using StreamPoolTy = ResourcePoolTy; std::vector> StreamPool; @@ -651,14 +650,9 @@ DP("Error getting max grid dimension, use default value %d\n", DeviceRTLTy::DefaultNumTeams); DeviceData[DeviceId].BlocksPerGrid = DeviceRTLTy::DefaultNumTeams; - } else if (MaxGridDimX <= DeviceRTLTy::HardTeamLimit) { + } else { DP("Using %d CUDA blocks per grid\n", MaxGridDimX); DeviceData[DeviceId].BlocksPerGrid = MaxGridDimX; - } else { - DP("Max CUDA blocks per grid %d exceeds the hard team limit %d, capping " - "at the hard limit\n", - MaxGridDimX, DeviceRTLTy::HardTeamLimit); - DeviceData[DeviceId].BlocksPerGrid = DeviceRTLTy::HardTeamLimit; } // We are only exploiting threads along the x axis.