Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -718,6 +718,11 @@ FunctionDecl *CurFn = dyn_cast(CurContext); if (!CurFn) return; + if (getLangOpts().HIP) { + Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); + return; + } CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); if (Target == CFT_Global || Target == CFT_Device) { Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); Index: clang/test/CodeGenCUDA/lambda.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/lambda.cu @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple x86_64-linux-gnu | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// Device side kernel name. +// HOST: @[[KERN:[0-9]+]] = private unnamed_addr constant [22 x i8] c"_Z1gIZ4mainEUlvE_EvT_\00" + +// Template instantiation for h. +// HOST-LABEL: define internal void @_Z1hIZ4mainEUlvE_EvT_ + +// HOST-LABEL: define internal void @_Z16__device_stub__gIZ4mainEUlvE_EvT_ + +// Check kernel is registered with correct device side kernel name. +// HOST: @__hipRegisterFunction(i8** %0, i8* bitcast ({{.*}}@[[KERN]] + +// Check lambda is not emitted in host compilation. +// HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv + +// DEV: @a = addrspace(1) externally_initialized global i32 0 + +// Check kernel is calling lambda function. +// DEV-LABEL: define amdgpu_kernel void @_Z1gIZ4mainEUlvE_EvT_ +// DEV: call void @_ZZ4mainENKUlvE_clEv + +// Check lambda is emitted in device compilation and accessind device variable. +// DEV-LABEL: define internal void @_ZZ4mainENKUlvE_clEv +// DEV: store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*) +template +__global__ void g(F f) { f(); } + +template +void h(F f) { g<<<1,1>>>(f); } + +__device__ int a; + +int main(void) { + h([&](){ a=1;}); +} Index: clang/test/SemaCUDA/lambda.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/lambda.cu @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +__device__ int a; + +int main(void) { + auto lambda_kernel = [&]__global__(){ a = 1;}; + // expected-error@-1 {{kernel function 'operator()' must be a free function or static member function}} + lambda_kernel<<<1, 1>>>(); + return 0; +}