Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -3590,6 +3590,9 @@ MaybeHandleStaticInExternC(D, Fn); + if ((getTriple().getArch() == llvm::Triple::amdgcn) && + D->hasAttr()) + Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(D, Fn, FI); Index: test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/kernel-amdgcn.cu @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv() +class A { +public: + static __global__ void kernel(){} +}; + +// CHECK: define void @_Z10non_kernelv() +__device__ void non_kernel(){} + +// CHECK: define amdgpu_kernel void @_Z6kerneli(i32 %x) +__global__ void kernel(int x) { + non_kernel(); +} + +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_() +template +__global__ void template_kernel(T x) {} + +void launch(void *f); + +int main() { + launch((void*)A::kernel); + launch((void*)kernel); + launch((void*)template_kernel); + return 0; +}