Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6704,6 +6704,7 @@ def err_deleted_inherited_ctor_use : Error< "constructor inherited by %0 from base class %1 is implicitly deleted">; +def note_called_by : Note<"called by %0">; def err_kern_type_not_void_return : Error< "kernel function type %0 must have void return type">; def err_kern_is_nonstatic_method : Error< Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -9249,26 +9249,42 @@ /// Diagnostics that are emitted only if we discover that the given function /// must be codegen'ed. Because handling these correctly adds overhead to /// compilation, this is currently only enabled for CUDA compilations. - llvm::DenseMap> + llvm::DenseMap, + std::vector> CUDADeferredDiags; /// FunctionDecls plus raw encodings of SourceLocations for which /// CheckCUDACall has emitted a (maybe deferred) "bad call" diagnostic. We /// use this to avoid emitting the same deferred diag twice. - llvm::DenseSet> LocsWithCUDACallDiags; + llvm::DenseSet, unsigned>> + LocsWithCUDACallDiags; - /// The set of CUDA functions that we've discovered must be emitted by tracing - /// the call graph. Functions that we can tell a priori must be emitted - /// aren't added to this set. - llvm::DenseSet CUDAKnownEmittedFns; + /// A pair of a canonical FunctionDecl and a SourceLocation. + struct FunctionDeclAndLoc { + CanonicalDeclPtr FD; + SourceLocation Loc; + }; + + /// An inverse call graph, mapping known-emitted functions to one of their + /// known-emitted callers (plus the location of the call). + /// + /// Functions that we can tell a priori must be emitted aren't added to this + /// map. + llvm::DenseMap, + /* Caller = */ FunctionDeclAndLoc> + CUDAKnownEmittedFns; /// A partial call graph maintained during CUDA compilation to support - /// deferred diagnostics. Specifically, functions are only added here if, at - /// the time they're added, they are not known-emitted. As soon as we - /// discover that a function is known-emitted, we remove it and everything it - /// transitively calls from this set and add those functions to - /// CUDAKnownEmittedFns. - llvm::DenseMap> CUDACallGraph; + /// deferred diagnostics. + /// + /// Functions are only added here if, at the time they're considered, they are + /// not known-emitted. As soon as we discover that a function is + /// known-emitted, we remove it and everything it transitively calls from this + /// set and add those functions to CUDAKnownEmittedFns. + llvm::DenseMap, + /* Callees = */ llvm::MapVector, + SourceLocation>> + CUDACallGraph; /// Diagnostic builder for CUDA errors which may or may not be deferred. /// @@ -9291,13 +9307,19 @@ K_Nop, /// Emit the diagnostic immediately (i.e., behave like Sema::Diag()). K_Immediate, + /// Emit the diagnostic immediately, and, if it's a warning or error, also + /// emit a call stack showing how this function can be reached by an a + /// priori known-emitted function. + K_ImmediateWithCallStack, /// Create a deferred diagnostic, which is emitted only if the function - /// it's attached to is codegen'ed. + /// it's attached to is codegen'ed. Also emit a call stack as with + /// K_ImmediateWithCallStack. K_Deferred }; CUDADiagBuilder(Kind K, SourceLocation Loc, unsigned DiagID, FunctionDecl *Fn, Sema &S); + ~CUDADiagBuilder(); /// Convertible to bool: True if we immediately emitted an error, false if /// we didn't emit an error or we created a deferred error. @@ -9309,38 +9331,29 @@ /// /// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably /// want to use these instead of creating a CUDADiagBuilder yourself. - operator bool() const { return ImmediateDiagBuilder.hasValue(); } + operator bool() const { return ImmediateDiag.hasValue(); } template friend const CUDADiagBuilder &operator<<(const CUDADiagBuilder &Diag, const T &Value) { - if (Diag.ImmediateDiagBuilder.hasValue()) - *Diag.ImmediateDiagBuilder << Value; - else if (Diag.PartialDiagInfo.hasValue()) - Diag.PartialDiagInfo->PD << Value; + if (Diag.ImmediateDiag.hasValue()) + *Diag.ImmediateDiag << Value; + else if (Diag.PartialDiag.hasValue()) + *Diag.PartialDiag << Value; return Diag; } private: - struct PartialDiagnosticInfo { - PartialDiagnosticInfo(Sema &S, SourceLocation Loc, PartialDiagnostic PD, - FunctionDecl *Fn) - : S(S), Loc(Loc), PD(std::move(PD)), Fn(Fn) {} - - ~PartialDiagnosticInfo() { - S.CUDADeferredDiags[Fn].push_back({Loc, std::move(PD)}); - } - - Sema &S; - SourceLocation Loc; - PartialDiagnostic PD; - FunctionDecl *Fn; - }; + Sema &S; + SourceLocation Loc; + unsigned DiagID; + FunctionDecl *Fn; + bool ShowCallStack; // Invariant: At most one of these Optionals has a value. // FIXME: Switch these to a Variant once that exists. - llvm::Optional ImmediateDiagBuilder; - llvm::Optional PartialDiagInfo; + llvm::Optional ImmediateDiag; + llvm::Optional PartialDiag; }; /// Creates a CUDADiagBuilder that emits the diagnostic if the current context Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -488,22 +488,6 @@ NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } -Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, - unsigned DiagID, FunctionDecl *Fn, - Sema &S) { - switch (K) { - case K_Nop: - break; - case K_Immediate: - ImmediateDiagBuilder.emplace(S.Diag(Loc, DiagID)); - break; - case K_Deferred: - assert(Fn && "Must have a function to attach the deferred diag to."); - PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn); - break; - } -} - // In CUDA, there are some constructs which may appear in semantically-valid // code, but trigger errors if we ever generate code for the function in which // they appear. Essentially every construct you're not allowed to use on the @@ -528,6 +512,54 @@ // until we discover that the function is known-emitted, at which point we take // it out of this map and emit the diagnostic. +Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, + unsigned DiagID, FunctionDecl *Fn, + Sema &S) + : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn), + ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) { + switch (K) { + case K_Nop: + break; + case K_Immediate: + case K_ImmediateWithCallStack: + ImmediateDiag.emplace(S.Diag(Loc, DiagID)); + break; + case K_Deferred: + assert(Fn && "Must have a function to attach the deferred diag to."); + PartialDiag.emplace(S.PDiag(DiagID)); + break; + } +} + +// Print notes showing how we can reach FD starting from an a priori +// known-callable function. +static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) { + auto FnIt = S.CUDAKnownEmittedFns.find(FD); + while (FnIt != S.CUDAKnownEmittedFns.end()) { + DiagnosticBuilder Builder( + S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); + Builder << FnIt->second.FD; + Builder.setForceEmit(); + + FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD); + } +} + +Sema::CUDADiagBuilder::~CUDADiagBuilder() { + if (ImmediateDiag) { + // Emit our diagnostic and, if it was a warning or error, output a callstack + // if Fn isn't a priori known-emitted. + bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel( + DiagID, Loc) >= DiagnosticsEngine::Warning; + ImmediateDiag.reset(); // Emit the immediate diag. + if (IsWarningOrError && ShowCallStack) + EmitCallStackNotes(S, Fn); + } else if (PartialDiag) { + assert(ShowCallStack && "Must always show call stack for deferred diags."); + S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)}); + } +} + // Do we know that we will eventually codegen the given function? static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { // Templates are emitted when they're instantiated. @@ -568,7 +600,7 @@ // mode until the function is known-emitted. if (getLangOpts().CUDAIsDevice) { return IsKnownEmitted(*this, dyn_cast(CurContext)) - ? CUDADiagBuilder::K_Immediate + ? CUDADiagBuilder::K_ImmediateWithCallStack : CUDADiagBuilder::K_Deferred; } return CUDADiagBuilder::K_Nop; @@ -596,7 +628,7 @@ return CUDADiagBuilder::K_Nop; return IsKnownEmitted(*this, dyn_cast(CurContext)) - ? CUDADiagBuilder::K_Immediate + ? CUDADiagBuilder::K_ImmediateWithCallStack : CUDADiagBuilder::K_Deferred; default: return CUDADiagBuilder::K_Nop; @@ -612,63 +644,84 @@ auto It = S.CUDADeferredDiags.find(FD); if (It == S.CUDADeferredDiags.end()) return; + bool HasWarningOrError = false; for (PartialDiagnosticAt &PDAt : It->second) { const SourceLocation &Loc = PDAt.first; const PartialDiagnostic &PD = PDAt.second; + HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel( + PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning; DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); Builder.setForceEmit(); PD.Emit(Builder); } S.CUDADeferredDiags.erase(It); + + // FIXME: Should this be called after every warning/error emitted in the loop + // above, instead of just once per function? That would be consistent with + // how we handle immediate errors, but it also seems like a bit much. + if (HasWarningOrError) + EmitCallStackNotes(S, FD); } // Indicate that this function (and thus everything it transtively calls) will // be codegen'ed, and emit any deferred diagnostics on this function and its // (transitive) callees. -static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) { +static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller, + FunctionDecl *OrigCallee, SourceLocation OrigLoc) { // Nothing to do if we already know that FD is emitted. - if (IsKnownEmitted(S, FD)) { - assert(!S.CUDACallGraph.count(FD)); + if (IsKnownEmitted(S, OrigCallee)) { + assert(!S.CUDACallGraph.count(OrigCallee)); return; } - // We've just discovered that FD is known-emitted. Walk our call graph to see - // what else we can now discover also must be emitted. - llvm::SmallVector Worklist = {FD}; - llvm::SmallSet Seen; - Seen.insert(FD); + // We've just discovered that OrigCallee is known-emitted. Walk our call + // graph to see what else we can now discover also must be emitted. + + struct CallInfo { + FunctionDecl *Caller; + FunctionDecl *Callee; + SourceLocation Loc; + }; + llvm::SmallVector Worklist = {{OrigCaller, OrigCallee, OrigLoc}}; + llvm::SmallSet, 4> Seen; + Seen.insert(OrigCallee); while (!Worklist.empty()) { - FunctionDecl *Caller = Worklist.pop_back_val(); - assert(!IsKnownEmitted(S, Caller) && + CallInfo C = Worklist.pop_back_val(); + assert(!IsKnownEmitted(S, C.Callee) && "Worklist should not contain known-emitted functions."); - S.CUDAKnownEmittedFns.insert(Caller); - EmitDeferredDiags(S, Caller); + S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc}; + EmitDeferredDiags(S, C.Callee); // If this is a template instantiation, explore its callgraph as well: // Non-dependent calls are part of the template's callgraph, while dependent // calls are part of to the instantiation's call graph. - if (auto *Templ = Caller->getPrimaryTemplate()) { + if (auto *Templ = C.Callee->getPrimaryTemplate()) { FunctionDecl *TemplFD = Templ->getAsFunction(); if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { Seen.insert(TemplFD); - Worklist.push_back(TemplFD); + Worklist.push_back( + {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc}); } } - // Add all functions called by Caller to our worklist. - auto CGIt = S.CUDACallGraph.find(Caller); + // Add all functions called by Callee to our worklist. + auto CGIt = S.CUDACallGraph.find(C.Callee); if (CGIt == S.CUDACallGraph.end()) continue; - for (FunctionDecl *Callee : CGIt->second) { - if (Seen.count(Callee) || IsKnownEmitted(S, Callee)) + for (std::pair, SourceLocation> FDLoc : + CGIt->second) { + FunctionDecl *NewCallee = FDLoc.first; + SourceLocation CallLoc = FDLoc.second; + if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee)) continue; - Seen.insert(Callee); - Worklist.push_back(Callee); + Seen.insert(NewCallee); + Worklist.push_back( + {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc}); } - // Caller is now known-emitted, so we no longer need to maintain its list of - // callees in CUDACallGraph. + // C.Callee is now known-emitted, so we no longer need to maintain its list + // of callees in CUDACallGraph. S.CUDACallGraph.erase(CGIt); } } @@ -686,7 +739,7 @@ // Otherwise, mark the call in our call graph so we can traverse it later. bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); if (CallerKnownEmitted) - MarkKnownEmitted(*this, Callee); + MarkKnownEmitted(*this, Caller, Callee, Loc); else { // If we have // host fn calls kernel fn calls host+device, @@ -695,7 +748,7 @@ // that, when compiling for host, only HD functions actually called from the // host get marked as known-emitted. if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) - CUDACallGraph[Caller].insert(Callee); + CUDACallGraph[Caller].insert({Callee, Loc}); } CUDADiagBuilder::Kind DiagKind = [&] { @@ -707,7 +760,7 @@ // If we know the caller will be emitted, we know this wrong-side call // will be emitted, so it's an immediate error. Otherwise, defer the // error until we know the caller is emitted. - return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate + return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack : CUDADiagBuilder::K_Deferred; default: return CUDADiagBuilder::K_Nop; @@ -729,7 +782,8 @@ CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, Caller, *this) << Callee; - return DiagKind != CUDADiagBuilder::K_Immediate; + return DiagKind != CUDADiagBuilder::K_Immediate && + DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack; } void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { Index: clang/test/SemaCUDA/bad-calls-on-same-line.cu =================================================================== --- clang/test/SemaCUDA/bad-calls-on-same-line.cu +++ clang/test/SemaCUDA/bad-calls-on-same-line.cu @@ -35,5 +35,7 @@ void host_fn() { hd(); hd(); // expected-note {{function template specialization 'hd'}} + // expected-note@-1 {{called by 'host_fn'}} hd(); // expected-note {{function template specialization 'hd'}} + // expected-note@-1 {{called by 'host_fn'}} } Index: clang/test/SemaCUDA/call-device-fn-from-host.cu =================================================================== --- clang/test/SemaCUDA/call-device-fn-from-host.cu +++ clang/test/SemaCUDA/call-device-fn-from-host.cu @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - -verify +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ +// RUN: -verify -verify-ignore-unexpected=note // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. Index: clang/test/SemaCUDA/call-host-fn-from-device.cu =================================================================== --- clang/test/SemaCUDA/call-host-fn-from-device.cu +++ clang/test/SemaCUDA/call-host-fn-from-device.cu @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \ -// RUN: -emit-llvm -o /dev/null -verify +// RUN: -emit-llvm -o /dev/null -verify -verify-ignore-unexpected=note // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. Index: clang/test/SemaCUDA/call-stack-for-deferred-err.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/call-stack-for-deferred-err.cu @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +// We should emit an error for hd_fn's use of a VLA. This would have been +// legal if hd_fn were never codegen'ed on the device, so we should also print +// out a callstack showing how we determine that hd_fn is known-emitted. +// +// Compare to no-call-stack-for-deferred-err.cu. + +inline __host__ __device__ void hd_fn(int n); +inline __device__ void device_fn2() { hd_fn(42); } // expected-note {{called by 'device_fn2'}} + +__global__ void kernel() { device_fn2(); } // expected-note {{called by 'kernel'}} + +inline __host__ __device__ void hd_fn(int n) { + int vla[n]; // expected-error {{variable-length array}} +} Index: clang/test/SemaCUDA/exceptions-host-device.cu =================================================================== --- clang/test/SemaCUDA/exceptions-host-device.cu +++ clang/test/SemaCUDA/exceptions-host-device.cu @@ -36,3 +36,6 @@ #endif } __device__ void call_hd3() { hd3(); } +#ifndef HOST +// expected-note@-2 {{called by 'call_hd3'}} +#endif Index: clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +// Here we should dump an error about the VLA in device_fn, but we should not +// print a callstack indicating how device_fn becomes known-emitted, because +// it's an error to use a VLA in any __device__ function, even one that doesn't +// get emitted. + +inline __device__ void device_fn(int n); +inline __device__ void device_fn2() { device_fn(42); } + +__global__ void kernel() { device_fn2(); } + +inline __device__ void device_fn(int n) { + int vla[n]; // expected-error {{variable-length array}} +} Index: clang/test/SemaCUDA/trace-through-global.cu =================================================================== --- clang/test/SemaCUDA/trace-through-global.cu +++ clang/test/SemaCUDA/trace-through-global.cu @@ -35,10 +35,16 @@ template void launch_kernel() { kernel<<<0, 0>>>(T()); - hd1(); - hd3(T()); + + // Notice that these two diagnostics are different: Because the call to hd1 + // is not dependent on T, the call to hd1 comes from 'launch_kernel', while + // the call to hd3, being dependent, comes from 'launch_kernel'. + hd1(); // expected-note {{called by 'launch_kernel'}} + hd3(T()); // expected-note {{called by 'launch_kernel'}} } void host_fn() { launch_kernel(); + // expected-note@-1 {{called by 'host_fn'}} + // expected-note@-2 {{called by 'host_fn'}} }