Index: include/clang/Basic/Specifiers.h =================================================================== --- include/clang/Basic/Specifiers.h +++ include/clang/Basic/Specifiers.h @@ -231,23 +231,24 @@ /// \brief CallingConv - Specifies the calling convention that a function uses. enum CallingConv { - CC_C, // __attribute__((cdecl)) - CC_X86StdCall, // __attribute__((stdcall)) - CC_X86FastCall, // __attribute__((fastcall)) - CC_X86ThisCall, // __attribute__((thiscall)) + CC_C, // __attribute__((cdecl)) + CC_X86StdCall, // __attribute__((stdcall)) + CC_X86FastCall, // __attribute__((fastcall)) + CC_X86ThisCall, // __attribute__((thiscall)) CC_X86VectorCall, // __attribute__((vectorcall)) - CC_X86Pascal, // __attribute__((pascal)) - CC_Win64, // __attribute__((ms_abi)) - CC_X86_64SysV, // __attribute__((sysv_abi)) - CC_X86RegCall, // __attribute__((regcall)) - CC_AAPCS, // __attribute__((pcs("aapcs"))) - CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) - CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) - CC_SpirFunction, // default for OpenCL functions on SPIR target - CC_OpenCLKernel, // inferred for OpenCL kernels - CC_Swift, // __attribute__((swiftcall)) - CC_PreserveMost, // __attribute__((preserve_most)) - CC_PreserveAll, // __attribute__((preserve_all)) + CC_X86Pascal, // __attribute__((pascal)) + CC_Win64, // __attribute__((ms_abi)) + CC_X86_64SysV, // __attribute__((sysv_abi)) + CC_X86RegCall, // __attribute__((regcall)) + CC_AAPCS, // __attribute__((pcs("aapcs"))) + CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) + CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) + CC_SpirFunction, // default for OpenCL functions on SPIR target + CC_OpenCLKernel, // inferred for OpenCL kernels + CC_Swift, // __attribute__((swiftcall)) + CC_PreserveMost, // __attribute__((preserve_most)) + CC_PreserveAll, // __attribute__((preserve_all)) + CC_CUDAKernel, // inferred for CUDA kernels }; /// \brief Checks whether the given calling convention supports variadic Index: lib/AST/ItaniumMangle.cpp =================================================================== --- lib/AST/ItaniumMangle.cpp +++ lib/AST/ItaniumMangle.cpp @@ -2628,6 +2628,7 @@ case CC_OpenCLKernel: case CC_PreserveMost: case CC_PreserveAll: + case CC_CUDAKernel: // FIXME: we should be mangling all of the above. return ""; Index: lib/AST/Type.cpp =================================================================== --- lib/AST/Type.cpp +++ lib/AST/Type.cpp @@ -2758,6 +2758,7 @@ case CC_Swift: return "swiftcall"; case CC_PreserveMost: return "preserve_most"; case CC_PreserveAll: return "preserve_all"; + case CC_CUDAKernel: return "cuda_kernel"; } llvm_unreachable("Invalid calling convention."); Index: lib/AST/TypePrinter.cpp =================================================================== --- lib/AST/TypePrinter.cpp +++ lib/AST/TypePrinter.cpp @@ -780,6 +780,10 @@ case CC_OpenCLKernel: // Do nothing. These CCs are not available as attributes. break; + case CC_CUDAKernel: + // ToDo: print this before the function. + OS << " __global__"; + break; case CC_Swift: OS << " __attribute__((swiftcall))"; break; Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -64,6 +64,7 @@ case CC_PreserveMost: return llvm::CallingConv::PreserveMost; case CC_PreserveAll: return llvm::CallingConv::PreserveAll; case CC_Swift: return llvm::CallingConv::Swift; + case CC_CUDAKernel: return CGM.getTargetCodeGenInfo().getCUDAKernelCallingConv(); } } Index: lib/CodeGen/CGDebugInfo.cpp =================================================================== --- lib/CodeGen/CGDebugInfo.cpp +++ lib/CodeGen/CGDebugInfo.cpp @@ -1022,6 +1022,9 @@ return llvm::dwarf::DW_CC_LLVM_PreserveAll; case CC_X86RegCall: return llvm::dwarf::DW_CC_LLVM_X86RegCall; + case CC_CUDAKernel: + // ToDo: Add llvm::dwarf::DW_CC_LLVM_CUDAKernel; + return 0; } return 0; } Index: lib/CodeGen/TargetInfo.h =================================================================== --- lib/CodeGen/TargetInfo.h +++ lib/CodeGen/TargetInfo.h @@ -223,6 +223,9 @@ /// Get LLVM calling convention for OpenCL kernel. virtual unsigned getOpenCLKernelCallingConv() const; + /// Get LLVM calling convention for CUDA kernel. + virtual unsigned getCUDAKernelCallingConv() const; + /// Get target specific null pointer. /// \param T is the LLVM type of the null pointer. /// \param QT is the clang QualType of the null pointer. Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -431,6 +431,10 @@ return llvm::CallingConv::SPIR_KERNEL; } +unsigned TargetCodeGenInfo::getCUDAKernelCallingConv() const { + return llvm::CallingConv::C; +} + llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const { return llvm::ConstantPointerNull::get(T); @@ -7630,6 +7634,7 @@ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; + unsigned getCUDAKernelCallingConv() const override; llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; @@ -7711,6 +7716,10 @@ return llvm::CallingConv::AMDGPU_KERNEL; } +unsigned AMDGPUTargetCodeGenInfo::getCUDAKernelCallingConv() const { + return llvm::CallingConv::AMDGPU_KERNEL; +} + // Currently LLVM assumes null pointers always have value 0, // which results in incorrectly transformed IR. Therefore, instead of // emitting null pointers in private and local address spaces, a null Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -25,6 +25,7 @@ #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/SourceManager.h" @@ -1657,6 +1658,16 @@ isa(D) && NeedToCaptureVariable(cast(D), NameInfo.getLoc()); + // Drop CUDA kernel calling convention since it is invisible to the user + // in DRE. + if (const auto *FT = Ty->getAs()) { + if (FT->getCallConv() == CC_CUDAKernel) { + FT = Context.adjustFunctionType(FT, + FT->getExtInfo().withCallingConv(CC_C)); + Ty = QualType(FT, Ty.getQualifiers().getAsOpaqueValue()); + } + } + DeclRefExpr *E; if (isa(D)) { VarTemplateSpecializationDecl *VarSpec = Index: lib/Sema/SemaOverload.cpp =================================================================== --- lib/Sema/SemaOverload.cpp +++ lib/Sema/SemaOverload.cpp @@ -1481,7 +1481,6 @@ .getTypePtr()); Changed = true; } - // Convert FromFPT's ExtParameterInfo if necessary. The conversion is valid // only if the ExtParameterInfo lists of the two function prototypes can be // merged and the merged list is identical to ToFPT's ExtParameterInfo list. Index: lib/Sema/SemaType.cpp =================================================================== --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -3316,6 +3316,18 @@ CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic, IsCXXInstanceMethod); + // Attribute AT_CUDAGlobal affects the calling convention for AMDGPU targets. + // This is the simplest place to infer calling convention for CUDA kernels. + if (S.getLangOpts().CUDA && S.getLangOpts().CUDAIsDevice) { + for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList(); + Attr; Attr = Attr->getNext()) { + if (Attr->getKind() == AttributeList::AT_CUDAGlobal) { + CC = CC_CUDAKernel; + break; + } + } + } + // Attribute AT_OpenCLKernel affects the calling convention for SPIR // and AMDGPU targets, hence it cannot be treated as a calling // convention attribute. This is the simplest place to infer Index: test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/kernel-amdgcn.cu @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv +class A { +public: + static __global__ void kernel(){} +}; + +// CHECK: define void @_Z10non_kernelv +__device__ void non_kernel(){} + +// CHECK: define amdgpu_kernel void @_Z6kerneli +__global__ void kernel(int x) { + non_kernel(); +} + +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_ +template +__global__ void template_kernel(T x) {} + +void launch(void *f); + +int main() { + launch((void*)A::kernel); + launch((void*)kernel); + launch((void*)template_kernel); + return 0; +} Index: tools/libclang/CXType.cpp =================================================================== --- tools/libclang/CXType.cpp +++ tools/libclang/CXType.cpp @@ -626,6 +626,7 @@ TCALLINGCONV(PreserveAll); case CC_SpirFunction: return CXCallingConv_Unexposed; case CC_OpenCLKernel: return CXCallingConv_Unexposed; + case CC_CUDAKernel: return CXCallingConv_Unexposed; break; } #undef TCALLINGCONV