Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -448,12 +448,6 @@ /// \brief Allocator for partial diagnostics. PartialDiagnostic::StorageAllocator DiagAllocator; - /// Diagnostics that are emitted if and only if the given function is - /// codegen'ed. Access these through FunctionDecl::addDeferredDiag() and - /// FunctionDecl::takeDeferredDiags(). - llvm::DenseMap> - DeferredDiags; - /// \brief The current C++ ABI. std::unique_ptr ABI; CXXABI *createCXXABI(const TargetInfo &T); @@ -604,11 +598,6 @@ return DiagAllocator; } - decltype(DeferredDiags) &getDeferredDiags() { return DeferredDiags; } - const decltype(DeferredDiags) &getDeferredDiags() const { - return DeferredDiags; - } - const TargetInfo &getTargetInfo() const { return *Target; } const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } Index: clang/include/clang/AST/Decl.h =================================================================== --- clang/include/clang/AST/Decl.h +++ clang/include/clang/AST/Decl.h @@ -2271,14 +2271,6 @@ /// returns 0. unsigned getMemoryFunctionKind() const; - /// Add a diagnostic to be emitted if and when this function is codegen'ed. - void addDeferredDiag(PartialDiagnosticAt PD); - - /// Gets this object's list of deferred diagnostics, if there are any. - /// - /// Although this is logically const, it clears our list of deferred diags. - std::vector takeDeferredDiags() const; - // Implement isa/cast/dyncast/etc. static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -9219,6 +9219,30 @@ QualType FieldTy, bool IsMsStruct, Expr *BitWidth, bool *ZeroWidth = nullptr); + /// Diagnostics that are emitted only if we discover that the given function + /// must be codegen'ed. Because handling these correctly adds overhead to + /// compilation, this is currently only enabled for CUDA compilations. + llvm::DenseMap> + CUDADeferredDiags; + + /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a + /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the + /// same deferred diag twice. + llvm::DenseSet LocsWithCUDACallDiags; + + /// The set of CUDA functions that we've discovered must be emitted by tracing + /// the call graph. Functions that we can tell a priori must be emitted + /// aren't added to this set. + llvm::DenseSet CUDAKnownEmittedFns; + + /// A partial call graph maintained during CUDA compilation to support + /// deferred diagnostics. Specifically, functions are only added here if, at + /// the time they're added, they are not known-emitted. As soon as we + /// discover that a function is known-emitted, we remove it and everything it + /// transitively calls from this set and add those functions to + /// CUDAKnownEmittedFns. + llvm::DenseMap> CUDACallGraph; + /// Diagnostic builder for CUDA errors which may or may not be deferred. /// /// In CUDA, there exist constructs (e.g. variable-length arrays, try/catch) @@ -9272,12 +9296,15 @@ private: struct PartialDiagnosticInfo { - PartialDiagnosticInfo(SourceLocation Loc, PartialDiagnostic PD, + PartialDiagnosticInfo(Sema &S, SourceLocation Loc, PartialDiagnostic PD, FunctionDecl *Fn) - : Loc(Loc), PD(std::move(PD)), Fn(Fn) {} + : S(S), Loc(Loc), PD(std::move(PD)), Fn(Fn) {} - ~PartialDiagnosticInfo() { Fn->addDeferredDiag({Loc, std::move(PD)}); } + ~PartialDiagnosticInfo() { + S.CUDADeferredDiags[Fn].push_back({Loc, std::move(PD)}); + } + Sema &S; SourceLocation Loc; PartialDiagnostic PD; FunctionDecl *Fn; @@ -9296,8 +9323,8 @@ /// - If CurContext is a __device__ or __global__ function, emits the /// diagnostics immediately. /// - If CurContext is a __host__ __device__ function and we are compiling for - /// the device, creates a deferred diagnostic which is emitted if and when - /// the function is codegen'ed. + /// the device, creates a diagnostic which is emitted if and when we realize + /// that the function will be codegen'ed. /// /// Example usage: /// @@ -9371,12 +9398,6 @@ void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD, const LookupResult &Previous); -private: - /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a - /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the - /// same deferred diag twice. - llvm::DenseSet LocsWithCUDACallDiags; - public: /// Check whether we're allowed to call Callee from the current context. /// Index: clang/lib/AST/Decl.cpp =================================================================== --- clang/lib/AST/Decl.cpp +++ clang/lib/AST/Decl.cpp @@ -3451,20 +3451,6 @@ return 0; } -void FunctionDecl::addDeferredDiag(PartialDiagnosticAt PD) { - getASTContext().getDeferredDiags()[this].push_back(std::move(PD)); -} - -std::vector FunctionDecl::takeDeferredDiags() const { - auto &DD = getASTContext().getDeferredDiags(); - auto It = DD.find(this); - if (It == DD.end()) - return {}; - auto Ret = std::move(It->second); - DD.erase(It); - return Ret; -} - //===----------------------------------------------------------------------===// // FieldDecl Implementation //===----------------------------------------------------------------------===// Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -490,10 +490,6 @@ /// MDNodes. llvm::DenseMap MetadataIdMap; - /// Diags gathered from FunctionDecl::takeDeferredDiags(). Emitted at the - /// very end of codegen. - std::vector> DeferredDiags; - public: CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts, const PreprocessorOptions &ppopts, Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -499,19 +499,6 @@ EmitVersionIdentMetadata(); EmitTargetMetadata(); - - // Emit any deferred diagnostics gathered during codegen. We didn't emit them - // when we first discovered them because that would have halted codegen, - // preventing us from gathering other deferred diags. - for (const PartialDiagnosticAt &DiagAt : DeferredDiags) { - SourceLocation Loc = DiagAt.first; - const PartialDiagnostic &PD = DiagAt.second; - DiagnosticBuilder Builder(getDiags().Report(Loc, PD.getDiagID())); - PD.Emit(Builder); - } - // Clear the deferred diags so they don't outlive the ASTContext's - // PartialDiagnostic allocator. - DeferredDiags.clear(); } void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { @@ -2913,37 +2900,6 @@ llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); - // Emit this function's deferred diagnostics, if none of them are errors. If - // any of them are errors, don't codegen the function, but also don't emit any - // of the diagnostics just yet. Emitting an error during codegen stops - // further codegen, and we want to display as many deferred diags as possible. - // We'll emit the now twice-deferred diags at the very end of codegen. - // - // (If a function has both error and non-error diags, we don't emit the - // non-error diags here, because order can be significant, e.g. with notes - // that follow errors.) - auto Diags = D->takeDeferredDiags(); - if (auto *Templ = D->getPrimaryTemplate()) { - auto TemplDiags = Templ->getAsFunction()->takeDeferredDiags(); - Diags.insert(Diags.end(), TemplDiags.begin(), TemplDiags.end()); - } - bool HasError = llvm::any_of(Diags, [this](const PartialDiagnosticAt &PDAt) { - return getDiags().getDiagnosticLevel(PDAt.second.getDiagID(), PDAt.first) >= - DiagnosticsEngine::Error; - }); - if (HasError) { - DeferredDiags.insert(DeferredDiags.end(), - std::make_move_iterator(Diags.begin()), - std::make_move_iterator(Diags.end())); - return; - } - for (PartialDiagnosticAt &PDAt : Diags) { - const SourceLocation &Loc = PDAt.first; - const PartialDiagnostic &PD = PDAt.second; - DiagnosticBuilder Builder(getDiags().Report(Loc, PD.getDiagID())); - PD.Emit(Builder); - } - // Compute the function info and LLVM type. const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -496,27 +496,84 @@ break; case K_Deferred: assert(Fn && "Must have a function to attach the deferred diag to."); - PartialDiagInfo.emplace(Loc, S.PDiag(DiagID), Fn); + PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn); break; } } +// In CUDA, there are some constructs which may appear in semantically-valid +// code, but trigger errors if we ever generate code for the function in which +// they appear. Essentially every construct you're not allowed to use on the +// device falls into this category, because you are allowed to use these +// constructs in a __host__ __device__ function, but only if that function is +// never codegen'ed on the device. +// +// To handle semantic checking for these constructs, we keep track of the set of +// functions we know will be emitted, either because we could tell a priori that +// they would be emitted, or because they were transitively called by a +// known-emitted function. +// +// We also keep a partial call graph of which not-known-emitted functions call +// which other not-known-emitted functions. +// +// When we see something which is illegal if the current function is emitted +// (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or +// CheckCUDACall), we first check if the current function is known-emitted. If +// so, we immediately output the diagnostic. +// +// Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags +// until we discover that the function is known-emitted, at which point we take +// it out of this map and emit the diagnostic. + +// Do we know that we will eventually codegen the given function? +static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { + // Templates are emitted when they're instantiated. + if (FD->isDependentContext()) + return false; + + // When compiling for device, host functions are never emitted. Similarly, + // when compiling for host, device and global functions are never emitted. + // (Technically, we do emit a host-side stub for global functions, but this + // doesn't count for our purposes here.) + Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); + if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) + return false; + if (!S.getLangOpts().CUDAIsDevice && + (T == Sema::CFT_Device || T == Sema::CFT_Global)) + return false; + + // Externally-visible and similar functions are always emitted. + if (S.getASTContext().GetGVALinkageForFunction(FD) > GVA_DiscardableODR) + return true; + + // Otherwise, the function is known-emitted if it's in our set of + // known-emitted functions. + return S.CUDAKnownEmittedFns.count(FD) > 0; +} + Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - CUDADiagBuilder::Kind DiagKind; - switch (CurrentCUDATarget()) { - case CFT_Global: - case CFT_Device: - DiagKind = CUDADiagBuilder::K_Immediate; - break; - case CFT_HostDevice: - DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Deferred - : CUDADiagBuilder::K_Nop; - break; - default: - DiagKind = CUDADiagBuilder::K_Nop; - } + CUDADiagBuilder::Kind DiagKind = [&] { + switch (CurrentCUDATarget()) { + case CFT_Global: + case CFT_Device: + return CUDADiagBuilder::K_Immediate; + case CFT_HostDevice: + // An HD function counts as host code if we're compiling for host, and + // device code if we're compiling for device. Defer any errors in device + // mode until the function is known-emitted. + if (getLangOpts().CUDAIsDevice) { + return IsKnownEmitted(*this, dyn_cast(CurContext)) + ? CUDADiagBuilder::K_Immediate + : CUDADiagBuilder::K_Deferred; + } + return CUDADiagBuilder::K_Nop; + + default: + return CUDADiagBuilder::K_Nop; + } + }(); return CUDADiagBuilder(DiagKind, Loc, DiagID, dyn_cast(CurContext), *this); } @@ -524,41 +581,119 @@ Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - CUDADiagBuilder::Kind DiagKind; - switch (CurrentCUDATarget()) { - case CFT_Host: - DiagKind = CUDADiagBuilder::K_Immediate; - break; - case CFT_HostDevice: - DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Nop - : CUDADiagBuilder::K_Deferred; - break; - default: - DiagKind = CUDADiagBuilder::K_Nop; - } + CUDADiagBuilder::Kind DiagKind = [&] { + switch (CurrentCUDATarget()) { + case CFT_Host: + return CUDADiagBuilder::K_Immediate; + case CFT_HostDevice: + // An HD function counts as host code if we're compiling for host, and + // device code if we're compiling for device. Defer any errors in device + // mode until the function is known-emitted. + if (getLangOpts().CUDAIsDevice) + return CUDADiagBuilder::K_Nop; + + return IsKnownEmitted(*this, dyn_cast(CurContext)) + ? CUDADiagBuilder::K_Immediate + : CUDADiagBuilder::K_Deferred; + default: + return CUDADiagBuilder::K_Nop; + } + }(); return CUDADiagBuilder(DiagKind, Loc, DiagID, dyn_cast(CurContext), *this); } +// Emit any deferred diagnostics for FD and erase them from the map in which +// they're stored. +static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) { + auto It = S.CUDADeferredDiags.find(FD); + if (It == S.CUDADeferredDiags.end()) + return; + for (PartialDiagnosticAt &PDAt : It->second) { + const SourceLocation &Loc = PDAt.first; + const PartialDiagnostic &PD = PDAt.second; + DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); + Builder.setForceEmit(); + PD.Emit(Builder); + } + S.CUDADeferredDiags.erase(It); +} + +// Indicate that this function (and thus everything it transtively calls) will +// be codegen'ed, and emit any deferred diagnostics on this function and its +// (transitive) callees. +static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) { + // Nothing to do if we already know that FD is emitted. + if (IsKnownEmitted(S, FD)) { + assert(!S.CUDACallGraph.count(FD)); + return; + } + + // We've just discovered that FD is known-emitted. Walk our call graph to see + // what else we can now discover also must be emitted. + llvm::SmallVector Worklist = {FD}; + llvm::SmallSet Seen; + Seen.insert(FD); + while (!Worklist.empty()) { + FunctionDecl *Caller = Worklist.pop_back_val(); + assert(!IsKnownEmitted(S, Caller) && + "Worklist should not contain known-emitted functions."); + S.CUDAKnownEmittedFns.insert(Caller); + EmitDeferredDiags(S, Caller); + + // Deferred diags are often emitted on the template itself, so emit those as + // well. + if (auto *Templ = Caller->getPrimaryTemplate()) + EmitDeferredDiags(S, Templ->getAsFunction()); + + // Add all functions called by Caller to our worklist. + auto CGIt = S.CUDACallGraph.find(Caller); + if (CGIt == S.CUDACallGraph.end()) + continue; + + for (FunctionDecl *Callee : CGIt->second) { + if (Seen.count(Callee) || IsKnownEmitted(S, Callee)) + continue; + Seen.insert(Callee); + Worklist.push_back(Callee); + } + + // Caller is now known-emitted, so we no longer need to maintain its list of + // callees in CUDACallGraph. + S.CUDACallGraph.erase(CGIt); + } +} + bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); assert(Callee && "Callee may not be null."); + // FIXME: Is bailing out early correct here? Should we instead assume that + // the caller is a global initializer? FunctionDecl *Caller = dyn_cast(CurContext); if (!Caller) return true; - CUDADiagBuilder::Kind DiagKind; - switch (IdentifyCUDAPreference(Caller, Callee)) { - case CFP_Never: - DiagKind = CUDADiagBuilder::K_Immediate; - break; - case CFP_WrongSide: - assert(Caller && "WrongSide calls require a non-null caller"); - DiagKind = CUDADiagBuilder::K_Deferred; - break; - default: - DiagKind = CUDADiagBuilder::K_Nop; - } + bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); + if (CallerKnownEmitted) + MarkKnownEmitted(*this, Callee); + else + CUDACallGraph[Caller].insert(Callee); + + CUDADiagBuilder::Kind DiagKind = [&] { + switch (IdentifyCUDAPreference(Caller, Callee)) { + case CFP_Never: + return CUDADiagBuilder::K_Immediate; + case CFP_WrongSide: + assert(Caller && "WrongSide calls require a non-null caller"); + // If we know the caller will be emitted, we know this wrong-side call + // will be emitted, so it's an immediate error. Otherwise, defer the + // error until we know the caller is emitted. + return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate + : CUDADiagBuilder::K_Deferred; + default: + return CUDADiagBuilder::K_Nop; + } + }(); // Avoid emitting this error twice for the same location. Using a hashtable // like this is unfortunate, but because we must continue parsing as normal Index: clang/test/Parser/lambda-attr.cu =================================================================== --- clang/test/Parser/lambda-attr.cu +++ clang/test/Parser/lambda-attr.cu @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcuda-is-device -verify %s __attribute__((device)) void device_fn() {} -__attribute__((device)) void hd_fn() {} +__attribute__((host, device)) void hd_fn() {} __attribute__((device)) void device_attr() { ([]() __attribute__((device)) { device_fn(); })(); Index: clang/test/SemaCUDA/call-host-fn-from-device.cu =================================================================== --- clang/test/SemaCUDA/call-host-fn-from-device.cu +++ clang/test/SemaCUDA/call-host-fn-from-device.cu @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - -verify +// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \ +// RUN: -emit-llvm -o /dev/null -verify // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. Index: clang/test/SemaCUDA/function-overload.cu =================================================================== --- clang/test/SemaCUDA/function-overload.cu +++ clang/test/SemaCUDA/function-overload.cu @@ -170,11 +170,23 @@ DeviceReturnTy ret_d = d(); DeviceFnPtr fp_cd = cd; DeviceReturnTy ret_cd = cd(); +#if !defined(__CUDA_ARCH__) + // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}} + // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}} + // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}} + // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}} +#endif HostFnPtr fp_h = h; HostReturnTy ret_h = h(); HostFnPtr fp_ch = ch; HostReturnTy ret_ch = ch(); +#if defined(__CUDA_ARCH__) + // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}} + // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}} + // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}} + // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}} +#endif CurrentFnPtr fp_dh = dh; CurrentReturnTy ret_dh = dh(); @@ -308,7 +320,11 @@ // If we have a mix of HD and H-only or D-only candidates in the overload set, // normal C++ overload resolution rules apply first. -template TemplateReturnTy template_vs_hd_function(T arg) { +template TemplateReturnTy template_vs_hd_function(T arg) +#ifdef __CUDA_ARCH__ +//expected-note@-2 {{declared here}} +#endif +{ return TemplateReturnTy(); } __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) { @@ -318,6 +334,9 @@ __host__ __device__ void test_host_device_calls_hd_template() { HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f); TemplateReturnTy ret2 = template_vs_hd_function(1); +#ifdef __CUDA_ARCH__ + // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function' in __host__ __device__ function}} +#endif } __host__ void test_host_calls_hd_template() { @@ -337,14 +356,30 @@ // side of compilation. __device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); } __device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); } +#ifndef __CUDA_ARCH__ + // expected-note@-3 {{'device_only_function' declared here}} + // expected-note@-3 {{'device_only_function' declared here}} +#endif __host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); } __host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); } +#ifdef __CUDA_ARCH__ + // expected-note@-3 {{'host_only_function' declared here}} + // expected-note@-3 {{'host_only_function' declared here}} +#endif __host__ __device__ void test_host_device_single_side_overloading() { DeviceReturnTy ret1 = device_only_function(1); DeviceReturnTy2 ret2 = device_only_function(1.0f); +#ifndef __CUDA_ARCH__ + // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}} + // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}} +#endif HostReturnTy ret3 = host_only_function(1); HostReturnTy2 ret4 = host_only_function(1.0f); +#ifdef __CUDA_ARCH__ + // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}} + // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}} +#endif } // Verify that we allow overloading function templates. Index: clang/test/SemaCUDA/method-target.cu =================================================================== --- clang/test/SemaCUDA/method-target.cu +++ clang/test/SemaCUDA/method-target.cu @@ -29,7 +29,7 @@ // Test 3: device method called from host function struct S3 { - __device__ void method() {} // expected-note {{'method' declared here}}; + __device__ void method() {} // expected-note {{'method' declared here}} }; void foo3(S3& s) { @@ -40,11 +40,11 @@ // Test 4: device method called from host&device function struct S4 { - __device__ void method() {} + __device__ void method() {} // expected-note {{'method' declared here}} }; __host__ __device__ void foo4(S4& s) { - s.method(); + s.method(); // expected-error {{reference to __device__ function 'method' in __host__ __device__ function}} } //------------------------------------------------------------------------------