Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -10150,6 +10150,16 @@ bool isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD); bool isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *CD); + // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In + // case of error emits appropriate diagnostic and invalidates \p Var. + // + // \details CUDA allows only empty constructors as initializers for global + // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all + // __shared__ variables whether they are local or not (they all are implicitly + // static in CUDA). One exception is that CUDA allows constant initializers + // for __constant__ and __device__ variables. + void checkAllowedCUDAInitializer(VarDecl *Var); + /// Check whether NewFD is a valid overload for CUDA. Emits /// diagnostics and invalidates NewFD if not. void checkCUDATargetOverload(FunctionDecl *NewFD, Index: lib/Sema/SemaCUDA.cpp =================================================================== --- lib/Sema/SemaCUDA.cpp +++ lib/Sema/SemaCUDA.cpp @@ -471,6 +471,59 @@ return true; } +void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { + if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) + return; + const Expr *Init = VD->getInit(); + if (VD->hasAttr() || VD->hasAttr() || + VD->hasAttr()) { + assert(!VD->isStaticLocal() || VD->hasAttr()); + bool AllowedInit = false; + if (const CXXConstructExpr *CE = dyn_cast(Init)) + AllowedInit = + isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); + // We'll allow constant initializers even if it's a non-empty + // constructor according to CUDA rules. This deviates from NVCC, + // but allows us to handle things like constexpr constructors. + if (!AllowedInit && + (VD->hasAttr() || VD->hasAttr())) + AllowedInit = VD->getInit()->isConstantInitializer( + Context, VD->getType()->isReferenceType()); + + // Also make sure that destructor, if there is one, is empty. + if (AllowedInit) + if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) + AllowedInit = + isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); + + if (!AllowedInit) { + Diag(VD->getLocation(), VD->hasAttr() + ? diag::err_shared_var_init + : diag::err_dynamic_var_init) + << Init->getSourceRange(); + VD->setInvalidDecl(); + } + } else { + // This is a host-side global variable. Check that the initializer is + // callable from the host side. + const FunctionDecl *InitFn = nullptr; + if (const CXXConstructExpr *CE = dyn_cast(Init)) { + InitFn = CE->getConstructor(); + } else if (const CallExpr *CE = dyn_cast(Init)) { + InitFn = CE->getDirectCallee(); + } + if (InitFn) { + CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); + if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { + Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) + << InitFnTarget << InitFn; + Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; + VD->setInvalidDecl(); + } + } + } +} + // With -fcuda-host-device-constexpr, an unattributed constexpr function is // treated as implicitly __host__ __device__, unless: // * it is a variadic function (device-side variadic functions are not Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -11629,58 +11629,8 @@ // 7.5). We must also apply the same checks to all __shared__ // variables whether they are local or not. CUDA also allows // constant initializers for __constant__ and __device__ variables. - if (getLangOpts().CUDA) { - const Expr *Init = VD->getInit(); - if (Init && VD->hasGlobalStorage()) { - if (VD->hasAttr() || VD->hasAttr() || - VD->hasAttr()) { - assert(!VD->isStaticLocal() || VD->hasAttr()); - bool AllowedInit = false; - if (const CXXConstructExpr *CE = dyn_cast(Init)) - AllowedInit = - isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); - // We'll allow constant initializers even if it's a non-empty - // constructor according to CUDA rules. This deviates from NVCC, - // but allows us to handle things like constexpr constructors. - if (!AllowedInit && - (VD->hasAttr() || VD->hasAttr())) - AllowedInit = VD->getInit()->isConstantInitializer( - Context, VD->getType()->isReferenceType()); - - // Also make sure that destructor, if there is one, is empty. - if (AllowedInit) - if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) - AllowedInit = - isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); - - if (!AllowedInit) { - Diag(VD->getLocation(), VD->hasAttr() - ? diag::err_shared_var_init - : diag::err_dynamic_var_init) - << Init->getSourceRange(); - VD->setInvalidDecl(); - } - } else { - // This is a host-side global variable. Check that the initializer is - // callable from the host side. - const FunctionDecl *InitFn = nullptr; - if (const CXXConstructExpr *CE = dyn_cast(Init)) { - InitFn = CE->getConstructor(); - } else if (const CallExpr *CE = dyn_cast(Init)) { - InitFn = CE->getDirectCallee(); - } - if (InitFn) { - CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); - if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { - Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) - << InitFnTarget << InitFn; - Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; - VD->setInvalidDecl(); - } - } - } - } - } + if (getLangOpts().CUDA) + checkAllowedCUDAInitializer(VD); // Grab the dllimport or dllexport attribute off of the VarDecl. const InheritableAttr *DLLAttr = getDLLAttr(VD); Index: lib/Sema/SemaTemplateInstantiateDecl.cpp =================================================================== --- lib/Sema/SemaTemplateInstantiateDecl.cpp +++ lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -4221,6 +4221,9 @@ ActOnUninitializedDecl(Var); } + + if (getLangOpts().CUDA) + checkAllowedCUDAInitializer(Var); } /// \brief Instantiate the definition of the given variable from its Index: test/SemaCUDA/device-var-init.cu =================================================================== --- test/SemaCUDA/device-var-init.cu +++ test/SemaCUDA/device-var-init.cu @@ -225,3 +225,20 @@ static int x = 42; // no error on device because this is never codegen'ed there. } void call_hd_emitted_host_only() { hd_emitted_host_only(); } + +// Verify that we also check field initializers in instantiated structs. +struct NontrivialInitializer { + __host__ __device__ NontrivialInitializer() : x(43) {} + int x; +}; + +template +__global__ void bar() { + __shared__ T bad; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +} + +void instantiate() { + bar<<<1, 1>>>(); +// expected-note@-1 {{in instantiation of function template specialization 'bar' requested here}} +}