diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringRef.h" + #include #include #include @@ -33,6 +35,8 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" +using namespace llvm; + // Utility for retrieving and printing CUDA error string. #ifdef OMPTARGET_DEBUG #define CUDA_ERR_STRING(err) \ @@ -1529,13 +1533,14 @@ return false; // A subarchitecture was not specified. Assume it is compatible. - if (!info->Arch) + if (!info || !info->Arch) return true; int32_t NumberOfDevices = 0; if (cuDeviceGetCount(&NumberOfDevices) != CUDA_SUCCESS) return false; + StringRef ArchStr = StringRef(info->Arch).drop_front(sizeof("sm_")); for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) { CUdevice Device; if (cuDeviceGet(&Device, DeviceId) != CUDA_SUCCESS) @@ -1551,8 +1556,12 @@ Device) != CUDA_SUCCESS) return false; - std::string ArchStr = "sm_" + std::to_string(Major) + std::to_string(Minor); - if (ArchStr != info->Arch) + // A cubin generated for a certain compute capability is supported to run on + // any GPU with the same major revision and same or higher minor revision of + // compute capability. + int32_t ImageMajor = ArchStr[0] - '0'; + int32_t ImageMinor = ArchStr[1] - '0'; + if (Major != ImageMajor || Minor < ImageMinor) return false; }