diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h --- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h +++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h @@ -56,7 +56,8 @@ } hsa_device_type_t; typedef enum { - HSA_ISA_INFO_NAME = 1, + HSA_ISA_INFO_NAME_LENGTH = 0, + HSA_ISA_INFO_NAME = 1 } hsa_isa_info_t; typedef enum { diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -466,6 +467,7 @@ std::vector ThreadsPerGroup; std::vector WarpSize; std::vector GPUName; + std::vector TargetID; // OpenMP properties std::vector NumTeams; @@ -1901,11 +1903,145 @@ } } // namespace core +static hsa_status_t get_isa_info(hsa_isa_t isa, void *data) { + hsa_status_t err; + uint32_t name_len; + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &name_len); + if (err != HSA_STATUS_SUCCESS) { + DP("Error getting ISA info length\n"); + return err; + } + + char target_id[name_len]; + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, target_id); + if (err != HSA_STATUS_SUCCESS) { + DP("Error getting ISA info name\n"); + return err; + } + + auto TripleTargetID = llvm::StringRef(target_id); + if (TripleTargetID.consume_front_insensitive("amdgcn-amd-amdhsa")) { + DeviceInfo.TargetID.push_back(TripleTargetID.ltrim('-').str()); + } + return HSA_STATUS_SUCCESS; +} + +llvm::StringRef parseTargetID(llvm::StringRef target_id, + std::map *map) { + auto arch_feature = target_id.split(":"); + auto arch = arch_feature.first; + auto features = arch_feature.second; + + if (features.contains_insensitive("sramecc+")) { + map->insert(std::pair("sramecc", true)); + } else if (features.contains_insensitive("sramecc-")) { + map->insert(std::pair("sramecc", false)); + } + if (features.contains_insensitive("xnack+")) { + map->insert(std::pair("xnack", true)); + } else if (features.contains_insensitive("xnack-")) { + map->insert(std::pair("xnack", false)); + } + + return arch; +} + +/// Checks if an iamge \p img_info is compatible with current +/// system's environment \p env_info +bool is_image_compatible_with_env(char *img_info, std::string env_info) { + llvm::StringRef img_tid(img_info), env_tid(env_info); + + // Compatible in case of exact match + if (img_tid == env_tid) { + DP("Compatible: Exact match \t[Image: %s]\t:\t[Environment: %s]\n", + img_tid.data(), env_tid.data()); + return true; + } + + // Incompatible if Archs mismatch. + std::map img_map, env_map; + llvm::StringRef img_arch = parseTargetID(img_tid, &img_map); + llvm::StringRef env_arch = parseTargetID(env_tid, &env_map); + + // Both env_arch and img_arch can't be empty here. + if (env_arch.empty() || img_arch.empty() || !img_arch.contains(env_arch)) { + DP("Incompatible: Processor mismatch \t[Image: %s]\t:\t[Environment: %s]\n", + img_tid.data(), env_tid.data()); + return false; + } + + // Incompatible if image has more features than the environment, irrespective + // of type or sign of features. + if (img_map.size() > env_map.size()) { + DP("Incompatible: Image has more features than the environment \t[Image: " + "%s]\t:\t[Environment: %s]\n", + img_tid.data(), env_tid.data()); + return false; + } + + // Compatible if each target feature specified by the environment is + // compatible with target feature of the image. The target feature is + // compatible if the iamge does not specify it (meaning Any), or if it + // specifies it with the same value (meaning On or Off). + for (const auto &img_feature : img_map) { + auto env_feature = env_map.find(img_feature.first); + if (env_feature == env_map.end()) { + DP("Incompatible: Value of Image's non-ANY feature is not matching with " + "the Environment feature's ANY value \t[Image: %s]\t:\t[Environment: " + "%s]\n", + img_tid.data(), env_tid.data()); + return false; + } else if (env_feature->first == img_feature.first && + env_feature->second != img_feature.second) { + DP("Incompatible: Value of Image's non-ANY feature is not matching with " + "the Environment feature's non-ANY value \t[Image: " + "%s]\t:\t[Environment: %s]\n", + img_tid.data(), env_tid.data()); + return false; + } + } + + // Image is compatible if all features of Environment are: + // - either, present in the Image's features map with the same sign, + // - or, the feature is missing from Image's features map i.e. it is + // set to ANY + DP("Compatible: Target IDs are compatible \t[Image: %s]\t:\t[Environment: " + "%s]\n", + img_tid.data(), env_tid.data()); + return true; +} + extern "C" { int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { return elf_machine_id_is_amdgcn(image); } +int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image, + __tgt_image_info *info) { + if (!__tgt_rtl_is_valid_binary(image)) + return false; + + // A subarchitecture was not specified. Assume it is compatible. + if (!info->Arch) + return true; + + int32_t NumberOfDevices = __tgt_rtl_number_of_devices(); + + for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) { + __tgt_rtl_init_device(DeviceId); + hsa_agent_t agent = DeviceInfo.HSAAgents[DeviceId]; + hsa_status_t err = hsa_agent_iterate_isas(agent, get_isa_info, &DeviceId); + if (err != HSA_STATUS_SUCCESS) { + DP("Error iterating ISAs\n"); + return false; + } + if (is_image_compatible_with_env(info->Arch, DeviceInfo.TargetID[DeviceId])) + return false; + } + DP("Image has compatible compute capability: %s\n", info->Arch); + return true; +} + int __tgt_rtl_number_of_devices() { // If the construction failed, no methods are safe to call if (DeviceInfo.ConstructionSucceeded) { diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports --- a/openmp/libomptarget/plugins/exports +++ b/openmp/libomptarget/plugins/exports @@ -1,6 +1,7 @@ VERS1.0 { global: __tgt_rtl_is_valid_binary; + __tgt_rtl_is_valid_binary_info; __tgt_rtl_is_data_exchangable; __tgt_rtl_number_of_devices; __tgt_rtl_init_requires;