Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -9260,6 +9260,14 @@ /// an error otherwise. bool CheckCUDAVLA(SourceLocation Loc); + /// Set __device__ or __host__ __device__ attributes on the given lambda + /// operator() method. + /// + /// CUDA lambdas declared inside __device__ or __global__ functions inherit + /// the __device__ attribute. Similarly, lambdas inside __host__ __device__ + /// functions become __host__ __device__ themselves. + void CUDASetLambdaAttrs(CXXMethodDecl *Method); + /// Finds a function in \p Matches with highest calling priority /// from \p Caller context and erases all functions with lower /// calling priority. Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -559,3 +559,22 @@ } return true; } + +void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { + if (Method->hasAttr() || Method->hasAttr()) + return; + FunctionDecl *CurFn = dyn_cast(CurContext); + if (!CurFn) + return; + CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); + if (Target == CFT_Global || Target == CFT_Device) { + Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + } else if (Target == CFT_HostDevice) { + Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); + } + + // TODO: nvcc doesn't allow you to specify __host__ or __device__ attributes + // on lambdas in all contexts -- we should emit a compatibility warning where + // we're more permissive. +} Index: clang/lib/Sema/SemaLambda.cpp =================================================================== --- clang/lib/Sema/SemaLambda.cpp +++ clang/lib/Sema/SemaLambda.cpp @@ -886,7 +886,12 @@ // Attributes on the lambda apply to the method. ProcessDeclAttributes(CurScope, Method, ParamInfo); - + + // CUDA lambdas get implicit attributes based on the scope in which they're + // declared. + if (getLangOpts().CUDA) + CUDASetLambdaAttrs(Method); + // Introduce the function call operator as the current declaration context. PushDeclContext(CurScope, Method); Index: clang/test/SemaCUDA/implicit-device-lambda-hd.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/implicit-device-lambda-hd.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify -verify-ignore-unexpected=note \ +// RUN: -S -o /dev/null %s +// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only -verify-ignore-unexpected=note \ +// RUN: -DHOST -S -o /dev/null %s +#include "Inputs/cuda.h" + +__host__ __device__ void hd_fn() { + auto f1 = [&] {}; + f1(); // implicitly __host__ __device__ + + auto f2 = [&] __device__ {}; + f2(); +#ifdef HOST + // expected-error@-2 {{reference to __device__ function}} +#endif + + auto f3 = [&] __host__ {}; + f3(); +#ifndef HOST + // expected-error@-2 {{reference to __host__ function}} +#endif + + auto f4 = [&] __host__ __device__ {}; + f4(); +} + + Index: clang/test/SemaCUDA/implicit-device-lambda.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/implicit-device-lambda.cu @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify -fsyntax-only -verify-ignore-unexpected=note %s +// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only -verify-ignore-unexpected=note %s + +#include "Inputs/cuda.h" + +__device__ void device_fn() { + auto f1 = [&] {}; + f1(); // implicitly __device__ + + auto f2 = [&] __device__ {}; + f2(); + + auto f3 = [&] __host__ {}; + f3(); // expected-error {{no matching function}} + + auto f4 = [&] __host__ __device__ {}; + f4(); + + // Now do it all again with '()'s in the lambda declarations: This is a + // different parse path. + auto g1 = [&]() {}; + g1(); // implicitly __device__ + + auto g2 = [&]() __device__ {}; + g2(); + + auto g3 = [&]() __host__ {}; + g3(); // expected-error {{no matching function}} + + auto g4 = [&]() __host__ __device__ {}; + g4(); + + // Once more, with the '()'s in a different place. + auto h1 = [&]() {}; + h1(); // implicitly __device__ + + auto h2 = [&] __device__ () {}; + h2(); + + auto h3 = [&] __host__ () {}; + h3(); // expected-error {{no matching function}} + + auto h4 = [&] __host__ __device__ () {}; + h4(); +} + +// Behaves identically to device_fn. +__global__ void kernel_fn() { + auto f1 = [&] {}; + f1(); // implicitly __device__ + + auto f2 = [&] __device__ {}; + f2(); + + auto f3 = [&] __host__ {}; + f3(); // expected-error {{no matching function}} + + auto f4 = [&] __host__ __device__ {}; + f4(); + + // No need to re-test all the parser contortions we test in the device + // function. +} + +__host__ void host_fn() { + auto f1 = [&] {}; + f1(); // implicitly __host__ (i.e., no magic) + + auto f2 = [&] __device__ {}; + f2(); // expected-error {{no matching function}} + + auto f3 = [&] __host__ {}; + f3(); + + auto f4 = [&] __host__ __device__ {}; + f4(); +} + +// The special treatment above only applies to lambdas. +__device__ void foo() { + struct X { + void foo() {} + }; + X x; + x.foo(); // expected-error {{reference to __host__ function 'foo' in __device__ function}} +}