Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -9223,16 +9223,27 @@ void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD, const LookupResult &Previous); +private: + /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a + /// deferred "bad call" diagnostic. We use this to avoid emitting the same + /// deferred diag twice. + llvm::DenseSet LocsWithCUDACallDeferredDiags; + +public: /// Check whether we're allowed to call Callee from the current context. /// - /// If the call is never allowed in a semantically-correct program - /// (CFP_Never), emits an error and returns false. + /// - If the call is never allowed in a semantically-correct program + /// (CFP_Never), emits an error and returns false. /// - /// If the call is allowed in semantically-correct programs, but only if it's - /// never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to be - /// emitted if and when the caller is codegen'ed, and returns true. + /// - If the call is allowed in semantically-correct programs, but only if + /// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to + /// be emitted if and when the caller is codegen'ed, and returns true. /// - /// Otherwise, returns true without emitting any diagnostics. + /// Will only create deferred diagnostics for a given SourceLocation once, + /// so you can safely call this multiple times without generating duplicate + /// deferred errors. + /// + /// - Otherwise, returns true without emitting any diagnostics. bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee); /// Finds a function in \p Matches with highest calling priority Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -2895,6 +2895,10 @@ // non-error diags here, because order can be significant, e.g. with notes // that follow errors.) auto Diags = D->takeDeferredDiags(); + if (auto *Templ = D->getPrimaryTemplate()) { + auto TemplDiags = Templ->getAsFunction()->takeDeferredDiags(); + Diags.insert(Diags.end(), TemplDiags.begin(), TemplDiags.end()); + } bool HasError = llvm::any_of(Diags, [this](const PartialDiagnosticAt &PDAt) { return getDiags().getDiagnosticLevel(PDAt.second.getDiagID(), PDAt.first) >= DiagnosticsEngine::Error; Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -496,7 +496,13 @@ Diag(Callee->getLocation(), diag::note_previous_decl) << Callee; return false; } - if (Pref == Sema::CFP_WrongSide) { + + // Insert into LocsWithCUDADeferredDiags to avoid emitting duplicate deferred + // diagnostics for the same location. Duplicate deferred diags are otherwise + // tricky to avoid, because, unlike with regular errors, sema checking + // proceeds unhindered when we omit a deferred diagnostic. + if (Pref == Sema::CFP_WrongSide && + LocsWithCUDACallDeferredDiags.insert(Loc.getRawEncoding()).second) { // We have to do this odd dance to create our PartialDiagnostic because we // want its storage to be allocated with operator new, not in an arena. PartialDiagnostic ErrPD{PartialDiagnostic::NullDiagnostic()}; Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -374,6 +374,9 @@ if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, Loc)) return true; + + if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD)) + return true; } // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions @@ -1743,11 +1746,6 @@ const DeclarationNameInfo &NameInfo, const CXXScopeSpec *SS, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs) { - if (getLangOpts().CUDA) - if (FunctionDecl *Callee = dyn_cast(D)) - if (!CheckCUDACall(NameInfo.getLoc(), Callee)) - return ExprError(); - bool RefersToCapturedVariable = isa(D) && NeedToCaptureVariable(cast(D), NameInfo.getLoc()); @@ -5140,35 +5138,36 @@ return Callee->getMinRequiredArguments() <= NumArgs; } -static ExprResult ActOnCallExprImpl(Sema &S, Scope *Scope, Expr *Fn, - SourceLocation LParenLoc, - MultiExprArg ArgExprs, - SourceLocation RParenLoc, Expr *ExecConfig, - bool IsExecConfig) { +/// ActOnCallExpr - Handle a call to Fn with the specified array of arguments. +/// This provides the location of the left/right parens and a list of comma +/// locations. +ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, + MultiExprArg ArgExprs, SourceLocation RParenLoc, + Expr *ExecConfig, bool IsExecConfig) { // Since this might be a postfix expression, get rid of ParenListExprs. - ExprResult Result = S.MaybeConvertParenListExprToParenExpr(Scope, Fn); + ExprResult Result = MaybeConvertParenListExprToParenExpr(Scope, Fn); if (Result.isInvalid()) return ExprError(); Fn = Result.get(); - if (checkArgsForPlaceholders(S, ArgExprs)) + if (checkArgsForPlaceholders(*this, ArgExprs)) return ExprError(); - if (S.getLangOpts().CPlusPlus) { + if (getLangOpts().CPlusPlus) { // If this is a pseudo-destructor expression, build the call immediately. if (isa(Fn)) { if (!ArgExprs.empty()) { // Pseudo-destructor calls should not have any arguments. - S.Diag(Fn->getLocStart(), diag::err_pseudo_dtor_call_with_args) + Diag(Fn->getLocStart(), diag::err_pseudo_dtor_call_with_args) << FixItHint::CreateRemoval( SourceRange(ArgExprs.front()->getLocStart(), ArgExprs.back()->getLocEnd())); } - return new (S.Context) - CallExpr(S.Context, Fn, None, S.Context.VoidTy, VK_RValue, RParenLoc); + return new (Context) + CallExpr(Context, Fn, None, Context.VoidTy, VK_RValue, RParenLoc); } - if (Fn->getType() == S.Context.PseudoObjectTy) { - ExprResult result = S.CheckPlaceholderExpr(Fn); + if (Fn->getType() == Context.PseudoObjectTy) { + ExprResult result = CheckPlaceholderExpr(Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } @@ -5183,35 +5182,34 @@ if (Dependent) { if (ExecConfig) { - return new (S.Context) CUDAKernelCallExpr( - S.Context, Fn, cast(ExecConfig), ArgExprs, - S.Context.DependentTy, VK_RValue, RParenLoc); + return new (Context) CUDAKernelCallExpr( + Context, Fn, cast(ExecConfig), ArgExprs, + Context.DependentTy, VK_RValue, RParenLoc); } else { - return new (S.Context) - CallExpr(S.Context, Fn, ArgExprs, S.Context.DependentTy, VK_RValue, - RParenLoc); + return new (Context) CallExpr( + Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); } } // Determine whether this is a call to an object (C++ [over.call.object]). if (Fn->getType()->isRecordType()) - return S.BuildCallToObjectOfClassType(Scope, Fn, LParenLoc, ArgExprs, - RParenLoc); + return BuildCallToObjectOfClassType(Scope, Fn, LParenLoc, ArgExprs, + RParenLoc); - if (Fn->getType() == S.Context.UnknownAnyTy) { - ExprResult result = rebuildUnknownAnyFunction(S, Fn); + if (Fn->getType() == Context.UnknownAnyTy) { + ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } - if (Fn->getType() == S.Context.BoundMemberTy) { - return S.BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, - RParenLoc); + if (Fn->getType() == Context.BoundMemberTy) { + return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, + RParenLoc); } } // Check for overloaded calls. This can happen even in C due to extensions. - if (Fn->getType() == S.Context.OverloadTy) { + if (Fn->getType() == Context.OverloadTy) { OverloadExpr::FindResult find = OverloadExpr::find(Fn); // We aren't supposed to apply this logic for if there'Scope an '&' @@ -5219,17 +5217,17 @@ if (!find.HasFormOfMemberPointer) { OverloadExpr *ovl = find.Expression; if (UnresolvedLookupExpr *ULE = dyn_cast(ovl)) - return S.BuildOverloadedCallExpr( + return BuildOverloadedCallExpr( Scope, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig, /*AllowTypoCorrection=*/true, find.IsAddressOfOperand); - return S.BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, - RParenLoc); + return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs, + RParenLoc); } } // If we're directly calling a function, get the appropriate declaration. - if (Fn->getType() == S.Context.UnknownAnyTy) { - ExprResult result = rebuildUnknownAnyFunction(S, Fn); + if (Fn->getType() == Context.UnknownAnyTy) { + ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } @@ -5254,10 +5252,10 @@ // with no explicit address space with the address space of the arguments // in ArgExprs. if ((FDecl = - rewriteBuiltinFunctionDecl(&S, S.Context, FDecl, ArgExprs))) { + rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) { NDecl = FDecl; Fn = DeclRefExpr::Create( - S.Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false, + Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false, SourceLocation(), FDecl->getType(), Fn->getValueKind(), FDecl); } } @@ -5266,8 +5264,8 @@ if (FunctionDecl *FD = dyn_cast_or_null(NDecl)) { if (CallingNDeclIndirectly && - !S.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, - Fn->getLocStart())) + !checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, + Fn->getLocStart())) return ExprError(); // CheckEnableIf assumes that the we're passing in a sane number of args for @@ -5277,42 +5275,22 @@ // number of args looks incorrect, don't do enable_if checks; we should've // already emitted an error about the bad call. if (FD->hasAttr() && - isNumberOfArgsValidForCall(S, FD, ArgExprs.size())) { - if (const EnableIfAttr *Attr = S.CheckEnableIf(FD, ArgExprs, true)) { - S.Diag(Fn->getLocStart(), - isa(FD) - ? diag::err_ovl_no_viable_member_function_in_call - : diag::err_ovl_no_viable_function_in_call) + isNumberOfArgsValidForCall(*this, FD, ArgExprs.size())) { + if (const EnableIfAttr *Attr = CheckEnableIf(FD, ArgExprs, true)) { + Diag(Fn->getLocStart(), + isa(FD) + ? diag::err_ovl_no_viable_member_function_in_call + : diag::err_ovl_no_viable_function_in_call) << FD << FD->getSourceRange(); - S.Diag(FD->getLocation(), - diag::note_ovl_candidate_disabled_by_enable_if_attr) + Diag(FD->getLocation(), + diag::note_ovl_candidate_disabled_by_enable_if_attr) << Attr->getCond()->getSourceRange() << Attr->getMessage(); } } } - return S.BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc, - ExecConfig, IsExecConfig); -} - -/// ActOnCallExpr - Handle a call to Fn with the specified array of arguments. -/// This provides the location of the left/right parens and a list of comma -/// locations. -ExprResult Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc, - MultiExprArg ArgExprs, SourceLocation RParenLoc, - Expr *ExecConfig, bool IsExecConfig) { - ExprResult Ret = ActOnCallExprImpl(*this, S, Fn, LParenLoc, ArgExprs, - RParenLoc, ExecConfig, IsExecConfig); - - // If appropriate, check that this is a valid CUDA call (and emit an error if - // the call is not allowed). - if (getLangOpts().CUDA && Ret.isUsable()) - if (auto *Call = dyn_cast(Ret.get())) - if (auto *FD = Call->getDirectCallee()) - if (!CheckCUDACall(Call->getLocStart(), FD)) - return ExprError(); - - return Ret; + return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc, + ExecConfig, IsExecConfig); } /// ActOnAsTypeExpr - create a new asType (bitcast) from the arguments. Index: clang/test/SemaCUDA/Inputs/cuda.h =================================================================== --- clang/test/SemaCUDA/Inputs/cuda.h +++ clang/test/SemaCUDA/Inputs/cuda.h @@ -22,7 +22,9 @@ int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, cudaStream_t stream = 0); -// Device-side placement new overloads. +// Host- and device-side placement new overloads. +void *operator new(__SIZE_TYPE__, void *p) { return p; } +void *operator new[](__SIZE_TYPE__, void *p) { return p; } __device__ void *operator new(__SIZE_TYPE__, void *p) { return p; } __device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; } Index: clang/test/SemaCUDA/call-host-fn-from-device.cu =================================================================== --- clang/test/SemaCUDA/call-host-fn-from-device.cu +++ clang/test/SemaCUDA/call-host-fn-from-device.cu @@ -12,6 +12,9 @@ // expected-note@-4 {{'host_fn' declared here}} // expected-note@-5 {{'host_fn' declared here}} // expected-note@-6 {{'host_fn' declared here}} +// expected-note@-7 {{'host_fn' declared here}} + +struct Dummy {}; struct S { S() {} @@ -34,6 +37,15 @@ void h() {} // expected-note@-1 {{'h' declared here}} + + void operator+(); + // expected-note@-1 {{'operator+' declared here}} + + void operator-(const T&) {} + // expected-note@-1 {{'operator-' declared here}} + + operator Dummy() { return Dummy(); } + // expected-note@-1 {{'operator Dummy' declared here}} }; __host__ __device__ void T::hd3() { @@ -92,3 +104,30 @@ __host__ __device__ void fn_ptr_template() { auto* ptr = &host_fn; // Not an error because the template isn't instantiated. } + +__host__ __device__ void unaryOp() { + T t; + (void) +t; // expected-error {{reference to __host__ function 'operator+' in __host__ __device__ function}} +} + +__host__ __device__ void binaryOp() { + T t; + (void) (t - t); // expected-error {{reference to __host__ function 'operator-' in __host__ __device__ function}} +} + +__host__ __device__ void implicitConversion() { + T t; + Dummy d = t; // expected-error {{reference to __host__ function 'operator Dummy' in __host__ __device__ function}} +} + +template +struct TmplStruct { + template __host__ __device__ void fn() {} +}; + +template <> +template <> +__host__ __device__ void TmplStruct::fn() { host_fn(); } +// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}} + +__device__ void double_specialization() { TmplStruct().fn(); }