diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -1484,7 +1484,40 @@ #endif int32_t __tgt_rtl_is_valid_binary(__tgt_device_binary *image) { - return elf_check_machine(image, /* EM_CUDA */ 190); + if (!elf_check_machine(image, /* EM_CUDA */ 190)) + return false; + + // A subarchitecture was not specified. Assume it is compatible. + if (!image->Info.Arch) + return true; + + DP("The binary's compute capability is %s\n", image->Info.Arch); + + int32_t NumberOfDevices = 0; + if (cuDeviceGetCount(&NumberOfDevices) != CUDA_SUCCESS) + return false; + + for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) { + CUdevice Device; + if (cuDeviceGet(&Device, DeviceId) != CUDA_SUCCESS) + return false; + + int32_t Major, Minor; + if (cuDeviceGetAttribute(&Major, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + Device) != CUDA_SUCCESS) + return false; + if (cuDeviceGetAttribute(&Minor, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, + Device) != CUDA_SUCCESS) + return false; + + std::string ArchStr = "sm_" + std::to_string(Major) + std::to_string(Minor); + DP("Device %d has compute capability %s\n", DeviceId, ArchStr.c_str()); + if (ArchStr == image->Info.Arch) + return true; + } + return false; } int32_t __tgt_rtl_number_of_devices() { return DeviceRTL.getNumOfDevices(); }