Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -718,6 +718,11 @@ FunctionDecl *CurFn = dyn_cast(CurContext); if (!CurFn) return; + if (getLangOpts().HIP) { + Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); + return; + } CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); if (Target == CFT_Global || Target == CFT_Device) { Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); Index: clang/test/CodeGenCUDA/lambda.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/lambda.cu @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple x86_64-linux-gnu | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// Device side kernel name. +// HOST: @[[KERN_CAPTURE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_capturevEUlvE_EvT_\00" +// HOST: @[[KERN_RESOLVE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_resolvevEUlvE_EvT_\00" + +// Check functions emitted for test_capture in host compilation. +// Check lambda is not emitted in host compilation. +// HOST-LABEL: define void @_Z12test_capturev +// HOST: call void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_ +// HOST-LABEL: define internal void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_ +// HOST: call void @_Z16__device_stub__gIZ12test_capturevEUlvE_EvT_ +// HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv + +// Check functions emitted for test_resolve in host compilation. +// Check host version of template function 'overloaded' is emitted and called +// by the lambda function. +// HOST-LABEL: define void @_Z12test_resolvev +// HOST: call void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_() +// HOST-LABEL: define internal void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_ +// HOST: call void @_Z16__device_stub__gIZ12test_resolvevEUlvE_EvT_ +// HOST: call void @_ZZ12test_resolvevENKUlvE_clEv +// HOST-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv +// HOST: call i32 @_Z10overloadedIiET_v +// HOST-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v +// HOST: ret i32 2 + +// Check kernel is registered with correct device side kernel name. +// HOST: @__hipRegisterFunction({{.*}}@[[KERN_CAPTURE]] +// HOST: @__hipRegisterFunction({{.*}}@[[KERN_RESOLVE]] + +// DEV: @a = addrspace(1) externally_initialized global i32 0 + +// Check functions emitted for test_capture in device compilation. +// Check lambda is emitted in device compilation and accessing device variable. +// DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_capturevEUlvE_EvT_ +// DEV: call void @_ZZ12test_capturevENKUlvE_clEv +// DEV-LABEL: define internal void @_ZZ12test_capturevENKUlvE_clEv +// DEV: store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*) + +// Check functions emitted for test_resolve in device compilation. +// Check device version of template function 'overloaded' is emitted and called +// by the lambda function. +// DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_resolvevEUlvE_EvT_ +// DEV: call void @_ZZ12test_resolvevENKUlvE_clEv +// DEV-LABE: define internal void @_ZZ12test_resolvevENKUlvE_clEv +// DEV: call i32 @_Z10overloadedIiET_v +// DEV-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v +// DEV: ret i32 1 + +__device__ int a; + +template +__device__ T overloaded() { return 1; } + +template +__host__ T overloaded() { return 2; } + +template +__global__ void g(F f) { f(); } + +template +void test_capture_helper(F f) { g<<<1,1>>>(f); } + +template +void test_resolve_helper(F f) { g<<<1,1>>>(f); f(); } + +// Test capture of device variable in lambda function. +void test_capture(void) { + test_capture_helper([&](){ a = 1;}); +} + +// Test resolving host/device function in lambda function. +// Callee should resolve to correct host/device function based on where +// the lambda function is called, not where it is defined. +void test_resolve(void) { + test_resolve_helper([&](){ overloaded();}); +} Index: clang/test/SemaCUDA/lambda.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/lambda.cu @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +__device__ int a; + +int main(void) { + auto lambda_kernel = [&]__global__(){ a = 1;}; + // expected-error@-1 {{kernel function 'operator()' must be a free function or static member function}} + lambda_kernel<<<1, 1>>>(); + return 0; +}