Index: lib/Driver/ToolChains.cpp =================================================================== --- lib/Driver/ToolChains.cpp +++ lib/Driver/ToolChains.cpp @@ -4116,7 +4116,7 @@ if (CudaInstallation.isValid()) { addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath()); CC1Args.push_back("-include"); - CC1Args.push_back("cuda_runtime.h"); + CC1Args.push_back("__clang_cuda_runtime_wrapper.h"); } } Index: lib/Headers/CMakeLists.txt =================================================================== --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -15,9 +15,9 @@ avxintrin.h bmi2intrin.h bmiintrin.h + __clang_cuda_runtime_wrapper.h cpuid.h cuda_builtin_vars.h - cuda_runtime.h emmintrin.h f16cintrin.h float.h Index: lib/Headers/__clang_cuda_runtime_wrapper.h =================================================================== --- lib/Headers/__clang_cuda_runtime_wrapper.h +++ lib/Headers/__clang_cuda_runtime_wrapper.h @@ -1,4 +1,4 @@ -/*===---- cuda_runtime.h - CUDA runtime support ----------------------------=== +/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -21,8 +21,24 @@ *===-----------------------------------------------------------------------=== */ -#ifndef __CLANG_CUDA_RUNTIME_H__ -#define __CLANG_CUDA_RUNTIME_H__ +/* + * WARNING: This header is intended to be directly -include'd by + * the compiler and is not supposed to be included by users. + * + * CUDA headers are implemented in a way that currently makes it + * impossible for user code to #include directly when compiling with + * Clang. They present different view of CUDA-supplied functions + * depending on where in NVCC's compilation pipeline the headers are + * included. Neither of these modes provides function definitions with + * correct attributes, so we use preprocessor to force the headers + * into a form that Clang can use. + * + * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's + * this file during every CUDA compilation. + */ + +#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__ +#define __CLANG_CUDA_RUNTIME_WRAPPER_H__ #if defined(__CUDA__) && defined(__clang__) @@ -35,9 +51,9 @@ #pragma push_macro("__THROW") #pragma push_macro("__CUDA_ARCH__") -// WARNING: Preprocessor hacks below are based on specific of -// implementation of CUDA-7.x headers and are expected to break with -// any other version of CUDA headers. +// WARNING: Preprocessor hacks below are based on specific details of +// CUDA-7.x headers and are not expected to work with any other +// version of CUDA headers. #include "cuda.h" #if !defined(CUDA_VERSION) #error "cuda.h did not define CUDA_VERSION" @@ -76,12 +92,12 @@ #undef __CUDABE__ #define __CUDACC__ -#include_next "cuda_runtime.h" +#include "cuda_runtime.h" #undef __CUDACC__ #define __CUDABE__ -// CUDA headers use __nvvm_memcpy and __nvvm_memset which clang does +// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does // not have at the moment. Emulate them with a builtin memcpy/memset. #define __nvvm_memcpy(s,d,n,a) __builtin_memcpy(s,d,n) #define __nvvm_memset(d,c,n,a) __builtin_memset(d,c,n) @@ -176,4 +192,4 @@ #endif #endif // __CUDA__ -#endif // __CLANG_CUDA_RUNTIME_H__ +#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ Index: test/Driver/cuda-detect.cu =================================================================== --- test/Driver/cuda-detect.cu +++ test/Driver/cuda-detect.cu @@ -39,7 +39,7 @@ // RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE // Verify that we don't add include paths, link with libdevice or -// -include cuda_runtime without valid CUDA installation. +// -include __clang_cuda_runtime_wrapper.h without valid CUDA installation. // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ // RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON \ @@ -59,6 +59,6 @@ // NOLIBDEVICE-NOT: "-target-feature" "+ptx42" // CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include" // NOCUDAINC-NOT: "-internal-isystem" "{{.*}}/cuda/include" -// CUDAINC-SAME: "-include" "cuda_runtime.h" -// NOCUDAINC-NOT: "-include" "cuda_runtime.h" +// CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h" +// NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h" // COMMON-SAME: "-x" "cuda"