Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -3608,6 +3608,9 @@ MaybeHandleStaticInExternC(D, Fn); + if (D->hasAttr()) + getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn); + maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(D, Fn, FI); Index: lib/CodeGen/TargetInfo.h =================================================================== --- lib/CodeGen/TargetInfo.h +++ lib/CodeGen/TargetInfo.h @@ -301,6 +301,8 @@ /// mangled name of functions declared within an extern "C" region and marked /// as 'used', and having internal linkage. virtual bool shouldEmitStaticExternCAliases() const { return true; } + + virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {} }; } // namespace CodeGen Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -7652,6 +7652,7 @@ llvm::Function *BlockInvokeFunc, llvm::Value *BlockLiteral) const override; bool shouldEmitStaticExternCAliases() const override; + void setCUDAKernelCallingConvention(llvm::Function *F) const override; }; } @@ -7787,6 +7788,11 @@ return false; } +void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( + llvm::Function *F) const { + F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. Index: test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/kernel-amdgcn.cu @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv +class A { +public: + static __global__ void kernel(){} +}; + +// CHECK: define void @_Z10non_kernelv +__device__ void non_kernel(){} + +// CHECK: define amdgpu_kernel void @_Z6kerneli +__global__ void kernel(int x) { + non_kernel(); +} + +// CHECK: define amdgpu_kernel void @_Z11EmptyKernelIvEvv +template +__global__ void EmptyKernel(void) {} + +struct Dummy { + /// Type definition of the EmptyKernel kernel entry point + typedef void (*EmptyKernelPtr)(); + EmptyKernelPtr Empty() { return EmptyKernel; } +}; + +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_ +template +__global__ void template_kernel(T x) {} + +void launch(void *f); + +int main() { + Dummy D; + launch((void*)A::kernel); + launch((void*)kernel); + launch((void*)template_kernel); + launch((void*)D.Empty()); + return 0; +}