Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6640,6 +6640,9 @@ def err_ref_bad_target : Error< "reference to %select{__device__|__global__|__host__|__host__ __device__}0 " "function %1 in %select{__device__|__global__|__host__|__host__ __device__}2 function">; +def err_ref_bad_target_global_initializer : Error< + "reference to %select{__device__|__global__|__host__|__host__ __device__}0 " + "function %1 in global initializer">; def warn_kern_is_method : Extension< "kernel function %0 is a member function; this may not be accepted by nvcc">, InGroup; Index: clang/lib/Sema/SemaDecl.cpp =================================================================== --- clang/lib/Sema/SemaDecl.cpp +++ clang/lib/Sema/SemaDecl.cpp @@ -10728,36 +10728,55 @@ // 7.5). We must also apply the same checks to all __shared__ // variables whether they are local or not. CUDA also allows // constant initializers for __constant__ and __device__ variables. - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + if (getLangOpts().CUDA) { const Expr *Init = VD->getInit(); - if (Init && VD->hasGlobalStorage() && - (VD->hasAttr() || VD->hasAttr() || - VD->hasAttr())) { - assert((!VD->isStaticLocal() || VD->hasAttr())); - bool AllowedInit = false; - if (const CXXConstructExpr *CE = dyn_cast(Init)) - AllowedInit = - isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); - // We'll allow constant initializers even if it's a non-empty - // constructor according to CUDA rules. This deviates from NVCC, - // but allows us to handle things like constexpr constructors. - if (!AllowedInit && - (VD->hasAttr() || VD->hasAttr())) - AllowedInit = VD->getInit()->isConstantInitializer( - Context, VD->getType()->isReferenceType()); - - // Also make sure that destructor, if there is one, is empty. - if (AllowedInit) - if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) + if (Init && VD->hasGlobalStorage()) { + if (VD->hasAttr() || VD->hasAttr() || + VD->hasAttr()) { + assert((!VD->isStaticLocal() || VD->hasAttr())); + bool AllowedInit = false; + if (const CXXConstructExpr *CE = dyn_cast(Init)) AllowedInit = - isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); + isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); + // We'll allow constant initializers even if it's a non-empty + // constructor according to CUDA rules. This deviates from NVCC, + // but allows us to handle things like constexpr constructors. + if (!AllowedInit && + (VD->hasAttr() || VD->hasAttr())) + AllowedInit = VD->getInit()->isConstantInitializer( + Context, VD->getType()->isReferenceType()); - if (!AllowedInit) { - Diag(VD->getLocation(), VD->hasAttr() - ? diag::err_shared_var_init - : diag::err_dynamic_var_init) - << Init->getSourceRange(); - VD->setInvalidDecl(); + // Also make sure that destructor, if there is one, is empty. + if (AllowedInit) + if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) + AllowedInit = + isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); + + if (!AllowedInit) { + Diag(VD->getLocation(), VD->hasAttr() + ? diag::err_shared_var_init + : diag::err_dynamic_var_init) + << Init->getSourceRange(); + VD->setInvalidDecl(); + } + } else { + // This is a host-side global variable. Check that the initializer is + // callable from the host side. + const FunctionDecl *InitFn = nullptr; + if (const CXXConstructExpr *CE = dyn_cast(Init)) { + InitFn = CE->getConstructor(); + } else if (const CallExpr *CE = dyn_cast(Init)) { + InitFn = CE->getDirectCallee(); + } + if (InitFn) { + CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); + if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { + Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) + << InitFnTarget << InitFn; + Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; + VD->setInvalidDecl(); + } + } } } } Index: clang/test/SemaCUDA/call-device-fn-from-host.cu =================================================================== --- clang/test/SemaCUDA/call-device-fn-from-host.cu +++ clang/test/SemaCUDA/call-device-fn-from-host.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -emit-llvm -o - -verify +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - -verify // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. Index: clang/test/SemaCUDA/global-initializers-host.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/global-initializers-host.cu @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-linux-unknown -emit-llvm -o - -verify + +#include "Inputs/cuda.h" + +// Check that we get an error if we try to call a __device__ function from a +// module initializer. + +struct S { + __device__ S() {} + // expected-note@-1 {{'S' declared here}} +}; + +S s; +// expected-error@-1 {{reference to __device__ function 'S' in global initializer}} + +struct T { + __host__ __device__ T() {} +}; +T t; // No error, this is OK. + +struct U { + __host__ U() {} + __device__ U(int) {} + // expected-note@-1 {{'U' declared here}} +}; +U u(42); +// expected-error@-1 {{reference to __device__ function 'U' in global initializer}} + +__device__ int device_fn() { return 42; } +// expected-note@-1 {{'device_fn' declared here}} +int n = device_fn(); +// expected-error@-1 {{reference to __device__ function 'device_fn' in global initializer}}