Index: clang/include/clang/Basic/Cuda.h =================================================================== --- clang/include/clang/Basic/Cuda.h +++ clang/include/clang/Basic/Cuda.h @@ -25,7 +25,8 @@ CUDA_91, CUDA_92, CUDA_100, - LATEST = CUDA_100, + CUDA_101, + LATEST = CUDA_101, }; const char *CudaVersionToString(CudaVersion V); // Input is "Major.Minor" @@ -107,6 +108,8 @@ enum class CudaFeature { // CUDA-9.2+ uses a new API for launching kernels. CUDA_USES_NEW_LAUNCH, + // CUDA-10.1+ needs explicit end of GPU binary registration. + CUDA_USES_FATBIN_REGISTER_END, }; bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature); Index: clang/lib/Basic/Cuda.cpp =================================================================== --- clang/lib/Basic/Cuda.cpp +++ clang/lib/Basic/Cuda.cpp @@ -25,6 +25,8 @@ return "9.2"; case CudaVersion::CUDA_100: return "10.0"; + case CudaVersion::CUDA_101: + return "10.1"; } llvm_unreachable("invalid enum"); } @@ -37,7 +39,8 @@ .Case("9.0", CudaVersion::CUDA_90) .Case("9.1", CudaVersion::CUDA_91) .Case("9.2", CudaVersion::CUDA_92) - .Case("10.0", CudaVersion::CUDA_100); + .Case("10.0", CudaVersion::CUDA_100) + .Case("10.1", CudaVersion::CUDA_101); } const char *CudaArchToString(CudaArch A) { @@ -352,6 +355,8 @@ return CudaVersion::CUDA_92; case 100: return CudaVersion::CUDA_100; + case 101: + return CudaVersion::CUDA_101; default: return CudaVersion::UNKNOWN; } @@ -365,6 +370,8 @@ switch (Feature) { case CudaFeature::CUDA_USES_NEW_LAUNCH: return Version >= CudaVersion::CUDA_92; + case CudaFeature::CUDA_USES_FATBIN_REGISTER_END: + return Version >= CudaVersion::CUDA_101; } llvm_unreachable("Unknown CUDA feature."); } Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -445,6 +445,10 @@ llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), addUnderscoredPrefixToName("RegisterFatBinary")); + // void __cudaRegisterFatBinaryEnd(void **); + llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), + "__cudaRegisterFatBinaryEnd"); // struct { int magic, int version, void * gpu_binary, void * dont_care }; llvm::StructType *FatbinWrapperTy = llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy); @@ -616,6 +620,11 @@ // Call __cuda_register_globals(GpuBinaryHandle); if (RegisterGlobalsFunc) CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); + + // CUDA version requires calling __cudaRegisterFatBinaryEnd(Handle); + if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), + CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) + CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall); } else { // Generate a unique module ID. SmallString<64> ModuleID; Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -60,6 +60,8 @@ return CudaVersion::CUDA_92; if (Major == 10 && Minor == 0) return CudaVersion::CUDA_100; + if (Major == 10 && Minor == 1) + return CudaVersion::CUDA_101; return CudaVersion::UNKNOWN; } Index: clang/lib/Headers/__clang_cuda_runtime_wrapper.h =================================================================== --- clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -62,7 +62,7 @@ #include "cuda.h" #if !defined(CUDA_VERSION) #error "cuda.h did not define CUDA_VERSION" -#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000 +#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10010 #error "Unsupported CUDA version!" #endif