Index: cfe/trunk/include/clang/Sema/Sema.h
===================================================================
--- cfe/trunk/include/clang/Sema/Sema.h
+++ cfe/trunk/include/clang/Sema/Sema.h
@@ -9264,6 +9264,14 @@
   /// an error otherwise.
   bool CheckCUDAVLA(SourceLocation Loc);
 
+  /// Set __device__ or __host__ __device__ attributes on the given lambda
+  /// operator() method.
+  ///
+  /// CUDA lambdas declared inside __device__ or __global__ functions inherit
+  /// the __device__ attribute.  Similarly, lambdas inside __host__ __device__
+  /// functions become __host__ __device__ themselves.
+  void CUDASetLambdaAttrs(CXXMethodDecl *Method);
+
   /// Finds a function in \p Matches with highest calling priority
   /// from \p Caller context and erases all functions with lower
   /// calling priority.
Index: cfe/trunk/lib/Sema/SemaCUDA.cpp
===================================================================
--- cfe/trunk/lib/Sema/SemaCUDA.cpp
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp
@@ -559,3 +559,22 @@
   }
   return true;
 }
+
+void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
+  if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
+    return;
+  FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
+  if (!CurFn)
+    return;
+  CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
+  if (Target == CFT_Global || Target == CFT_Device) {
+    Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+  } else if (Target == CFT_HostDevice) {
+    Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+    Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
+  }
+
+  // TODO: nvcc doesn't allow you to specify __host__ or __device__ attributes
+  // on lambdas in all contexts -- we should emit a compatibility warning where
+  // we're more permissive.
+}
Index: cfe/trunk/lib/Sema/SemaLambda.cpp
===================================================================
--- cfe/trunk/lib/Sema/SemaLambda.cpp
+++ cfe/trunk/lib/Sema/SemaLambda.cpp
@@ -886,7 +886,12 @@
   
   // Attributes on the lambda apply to the method.  
   ProcessDeclAttributes(CurScope, Method, ParamInfo);
-  
+
+  // CUDA lambdas get implicit attributes based on the scope in which they're
+  // declared.
+  if (getLangOpts().CUDA)
+    CUDASetLambdaAttrs(Method);
+
   // Introduce the function call operator as the current declaration context.
   PushDeclContext(CurScope, Method);
     
Index: cfe/trunk/test/SemaCUDA/implicit-device-lambda-hd.cu
===================================================================
--- cfe/trunk/test/SemaCUDA/implicit-device-lambda-hd.cu
+++ cfe/trunk/test/SemaCUDA/implicit-device-lambda-hd.cu
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify -verify-ignore-unexpected=note \
+// RUN:   -S -o /dev/null %s
+// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only -verify-ignore-unexpected=note \
+// RUN:   -DHOST -S -o /dev/null %s
+#include "Inputs/cuda.h"
+
+__host__ __device__ void hd_fn() {
+  auto f1 = [&] {};
+  f1(); // implicitly __host__ __device__
+
+  auto f2 = [&] __device__ {};
+  f2();
+#ifdef HOST
+  // expected-error@-2 {{reference to __device__ function}}
+#endif
+
+  auto f3 = [&] __host__ {};
+  f3();
+#ifndef HOST
+  // expected-error@-2 {{reference to __host__ function}}
+#endif
+
+  auto f4 = [&] __host__ __device__ {};
+  f4();
+}
+
+
Index: cfe/trunk/test/SemaCUDA/implicit-device-lambda.cu
===================================================================
--- cfe/trunk/test/SemaCUDA/implicit-device-lambda.cu
+++ cfe/trunk/test/SemaCUDA/implicit-device-lambda.cu
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify -fsyntax-only -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only -verify-ignore-unexpected=note %s
+
+#include "Inputs/cuda.h"
+
+__device__ void device_fn() {
+  auto f1 = [&] {};
+  f1(); // implicitly __device__
+
+  auto f2 = [&] __device__ {};
+  f2();
+
+  auto f3 = [&] __host__ {};
+  f3();  // expected-error {{no matching function}}
+
+  auto f4 = [&] __host__ __device__ {};
+  f4();
+
+  // Now do it all again with '()'s in the lambda declarations: This is a
+  // different parse path.
+  auto g1 = [&]() {};
+  g1(); // implicitly __device__
+
+  auto g2 = [&]() __device__ {};
+  g2();
+
+  auto g3 = [&]() __host__ {};
+  g3();  // expected-error {{no matching function}}
+
+  auto g4 = [&]() __host__ __device__ {};
+  g4();
+
+  // Once more, with the '()'s in a different place.
+  auto h1 = [&]() {};
+  h1(); // implicitly __device__
+
+  auto h2 = [&] __device__ () {};
+  h2();
+
+  auto h3 = [&] __host__ () {};
+  h3();  // expected-error {{no matching function}}
+
+  auto h4 = [&] __host__ __device__ () {};
+  h4();
+}
+
+// Behaves identically to device_fn.
+__global__ void kernel_fn() {
+  auto f1 = [&] {};
+  f1(); // implicitly __device__
+
+  auto f2 = [&] __device__ {};
+  f2();
+
+  auto f3 = [&] __host__ {};
+  f3();  // expected-error {{no matching function}}
+
+  auto f4 = [&] __host__ __device__ {};
+  f4();
+
+  // No need to re-test all the parser contortions we test in the device
+  // function.
+}
+
+__host__ void host_fn() {
+  auto f1 = [&] {};
+  f1(); // implicitly __host__ (i.e., no magic)
+
+  auto f2 = [&] __device__ {};
+  f2();  // expected-error {{no matching function}}
+
+  auto f3 = [&] __host__ {};
+  f3();
+
+  auto f4 = [&] __host__ __device__ {};
+  f4();
+}
+
+// The special treatment above only applies to lambdas.
+__device__ void foo() {
+  struct X {
+    void foo() {}
+  };
+  X x;
+  x.foo(); // expected-error {{reference to __host__ function 'foo' in __device__ function}}
+}