diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -1519,6 +1519,43 @@ return elf_check_machine(Image, /* EM_CUDA */ 190); } +int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image, + __tgt_image_info *info) { + if (!__tgt_rtl_is_valid_binary(image)) + return false; + + // A subarchitecture was not specified. Assume it is compatible. + if (!info->Arch) + return true; + + int32_t NumberOfDevices = 0; + if (cuDeviceGetCount(&NumberOfDevices) != CUDA_SUCCESS) + return false; + + for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) { + CUdevice Device; + if (cuDeviceGet(&Device, DeviceId) != CUDA_SUCCESS) + return false; + + int32_t Major, Minor; + if (cuDeviceGetAttribute(&Major, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + Device) != CUDA_SUCCESS) + return false; + if (cuDeviceGetAttribute(&Minor, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, + Device) != CUDA_SUCCESS) + return false; + + std::string ArchStr = "sm_" + std::to_string(Major) + std::to_string(Minor); + if (ArchStr != info->Arch) + return false; + } + + DP("Image has compatible compute capability: %s\n", info->Arch); + return true; +} + int32_t __tgt_rtl_number_of_devices() { return DeviceRTL.getNumOfDevices(); } int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) { diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports --- a/openmp/libomptarget/plugins/exports +++ b/openmp/libomptarget/plugins/exports @@ -1,6 +1,7 @@ VERS1.0 { global: __tgt_rtl_is_valid_binary; + __tgt_rtl_is_valid_binary_info; __tgt_rtl_is_data_exchangable; __tgt_rtl_number_of_devices; __tgt_rtl_init_requires;