Index: cfe/trunk/lib/Sema/SemaCUDA.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaCUDA.cpp +++ cfe/trunk/lib/Sema/SemaCUDA.cpp @@ -577,8 +577,22 @@ (T == Sema::CFT_Device || T == Sema::CFT_Global)) return false; - // Externally-visible and similar functions are always emitted. - if (!isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(FD))) + // Check whether this function is externally visible -- if so, it's + // known-emitted. + // + // We have to check the GVA linkage of the function's *definition* -- if we + // only have a declaration, we don't know whether or not the function will be + // emitted, because (say) the definition could include "inline". + FunctionDecl *Def = FD->getDefinition(); + + // We may currently be parsing the body of FD, in which case + // FD->getDefinition() will be null, but we still want to treat FD as though + // it's a definition. + if (!Def && FD->willHaveBody()) + Def = FD; + + if (Def && + !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) return true; // Otherwise, the function is known-emitted if it's in our set of Index: cfe/trunk/test/SemaCUDA/add-inline-in-definition.cu =================================================================== --- cfe/trunk/test/SemaCUDA/add-inline-in-definition.cu +++ cfe/trunk/test/SemaCUDA/add-inline-in-definition.cu @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +#ifndef __CUDA_ARCH__ +// expected-no-diagnostics +#endif + +// When compiling for device, foo()'s call to host_fn() is an error, because +// foo() is known-emitted. +// +// The trickiness here comes from the fact that the FunctionDecl bar() sees +// foo() does not have the "inline" keyword, so we might incorrectly think that +// foo() is a priori known-emitted. This would prevent us from marking foo() +// as known-emitted when we see the call from bar() to foo(), which would +// prevent us from emitting an error for foo()'s call to host_fn() when we +// eventually see it. + +void host_fn() {} +#ifdef __CUDA_ARCH__ + // expected-note@-2 {{declared here}} +#endif + +__host__ __device__ void foo(); +__device__ void bar() { + foo(); +#ifdef __CUDA_ARCH__ + // expected-note@-2 {{called by 'bar'}} +#endif +} +inline __host__ __device__ void foo() { + host_fn(); +#ifdef __CUDA_ARCH__ + // expected-error@-2 {{reference to __host__ function}} +#endif +} + +// This is similar to the above, except there's no error here. This code used +// to trip an assertion due to us noticing, when emitting the definition of +// boom(), that T::operator S() was (incorrectly) considered a priori +// known-emitted. +struct S {}; +struct T { + __device__ operator S() const; +}; +__device__ inline T::operator S() const { return S(); } + +__device__ T t; +__device__ void boom() { + S s = t; +}