Index: lib/Sema/SemaCUDA.cpp =================================================================== --- lib/Sema/SemaCUDA.cpp +++ lib/Sema/SemaCUDA.cpp @@ -675,6 +675,11 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); assert(Callee && "Callee may not be null."); + + auto &ExprEvalCtx = ExprEvalContexts.back(); + if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) + return true; + // FIXME: Is bailing out early correct here? Should we instead assume that // the caller is a global initializer? FunctionDecl *Caller = dyn_cast(CurContext); Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -14799,6 +14799,9 @@ if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) ResolveExceptionSpec(Loc, FPT); + if (getLangOpts().CUDA) + CheckCUDACall(Loc, Func); + // If we don't need to mark the function as used, and we don't need to // try to provide a definition, there's nothing more to do. if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) && Index: test/SemaCUDA/call-device-fn-from-host.cu =================================================================== --- test/SemaCUDA/call-device-fn-from-host.cu +++ test/SemaCUDA/call-device-fn-from-host.cu @@ -37,7 +37,7 @@ } template __host__ __device__ void hd2() { device_fn(); } -// expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}} +// expected-error@-1 2 {{reference to __device__ function 'device_fn' in __host__ __device__ function}} void host_fn() { hd2(); } __host__ __device__ void hd() { device_fn(); } @@ -90,3 +90,8 @@ static __host__ __device__ void hd_func() { device_fn(); } __global__ void kernel() { hd_func(); } void host_func(void) { kernel<<<1, 1>>>(); } + +// Should allow host function call kernel template with device function argument. +__device__ void f(); +template __global__ void t() { F(); } +__host__ void g() { t<<<1,1>>>(); } Index: test/SemaCUDA/call-host-fn-from-device.cu =================================================================== --- test/SemaCUDA/call-host-fn-from-device.cu +++ test/SemaCUDA/call-host-fn-from-device.cu @@ -56,14 +56,14 @@ } template __host__ __device__ void hd2() { host_fn(); } -// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} +// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} __global__ void kernel() { hd2(); } __host__ __device__ void hd() { host_fn(); } // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} template __host__ __device__ void hd3() { host_fn(); } -// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} +// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} __device__ void device_fn() { hd3(); } // No error because this is never instantiated.