Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -235,6 +235,16 @@ set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING "Default OpenMP runtime used by -fopenmp.") +# OpenMP offloading requires at least sm_30 because we use shuffle instructions +# to generate efficient code for reductions. +set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs.") +if (NOT("${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}" MATCHES "^sm_[0-9]+$")) + message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30") + set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE) +endif() + set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING "Vendor-specific text for showing with version information.") Index: include/clang/Config/config.h.cmake =================================================================== --- include/clang/Config/config.h.cmake +++ include/clang/Config/config.h.cmake @@ -20,6 +20,9 @@ /* Default OpenMP runtime used by -fopenmp. */ #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}" +/* Default architecture for OpenMP offloading to Nvidia GPUs. */ +#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}" + /* Multilib suffix for libdir. */ #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}" Index: lib/Driver/ToolChains/Cuda.h =================================================================== --- lib/Driver/ToolChains/Cuda.h +++ lib/Driver/ToolChains/Cuda.h @@ -76,17 +76,6 @@ std::string getLibDeviceFile(StringRef Gpu) const { return LibDeviceMap.lookup(Gpu); } - /// \brief Get lowest available compute capability - /// for which a libdevice library exists. - std::string getLowestExistingArch() const { - std::string LibDeviceFile; - for (auto key : LibDeviceMap.keys()) { - LibDeviceFile = LibDeviceMap.lookup(key); - if (!LibDeviceFile.empty()) - return key; - } - return "sm_20"; - } }; namespace tools { Index: lib/Driver/ToolChains/Cuda.cpp =================================================================== --- lib/Driver/ToolChains/Cuda.cpp +++ lib/Driver/ToolChains/Cuda.cpp @@ -167,19 +167,6 @@ } } - // This code prevents IsValid from being set when - // no libdevice has been found. - bool allEmpty = true; - std::string LibDeviceFile; - for (auto key : LibDeviceMap.keys()) { - LibDeviceFile = LibDeviceMap.lookup(key); - if (!LibDeviceFile.empty()) - allEmpty = false; - } - - if (allEmpty) - continue; - IsValid = true; break; } @@ -565,12 +552,8 @@ StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ); if (Arch.empty()) { - // Default compute capability for CUDA toolchain is the - // lowest compute capability supported by the installed - // CUDA version. - DAL->AddJoinedArg(nullptr, - Opts.getOption(options::OPT_march_EQ), - CudaInstallation.getLowestExistingArch()); + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + CLANG_OPENMP_NVPTX_DEFAULT_ARCH); } return DAL;