Index: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td +++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td @@ -6734,7 +6734,7 @@ "conflicting __device__ function declared here">; def err_cuda_device_exceptions : Error< "cannot use '%0' in " - "%select{__device__|__global__|__host__|__host__ __device__}1 function %2">; + "%select{__device__|__global__|__host__|__host__ __device__}1 function">; def err_dynamic_var_init : Error< "dynamic initialization is not supported for " "__device__, __constant__, and __shared__ variables.">; Index: cfe/trunk/include/clang/Sema/Sema.h =================================================================== --- cfe/trunk/include/clang/Sema/Sema.h +++ cfe/trunk/include/clang/Sema/Sema.h @@ -9245,6 +9245,100 @@ /// before incrementing, so you can emit an error. bool PopForceCUDAHostDevice(); + /// Diagnostic builder for CUDA errors which may or may not be deferred. + /// + /// In CUDA, there exist constructs (e.g. variable-length arrays, try/catch) + /// which are not allowed to appear inside __device__ functions and are + /// allowed to appear in __host__ __device__ functions only if the host+device + /// function is never codegen'ed. + /// + /// To handle this, we use the notion of "deferred diagnostics", where we + /// attach a diagnostic to a FunctionDecl that's emitted iff it's codegen'ed. + /// + /// This class lets you emit either a regular diagnostic, a deferred + /// diagnostic, or no diagnostic at all, according to an argument you pass to + /// its constructor, thus simplifying the process of creating these "maybe + /// deferred" diagnostics. + class CUDADiagBuilder { + public: + enum Kind { + /// Emit no diagnostics. + K_Nop, + /// Emit the diagnostic immediately (i.e., behave like Sema::Diag()). + K_Immediate, + /// Create a deferred diagnostic, which is emitted only if the function + /// it's attached to is codegen'ed. + K_Deferred + }; + + CUDADiagBuilder(Kind K, SourceLocation Loc, unsigned DiagID, + FunctionDecl *Fn, Sema &S); + + /// Convertible to bool: True if we immediately emitted an error, false if + /// we didn't emit an error or we created a deferred error. + /// + /// Example usage: + /// + /// if (CUDADiagBuilder(...) << foo << bar) + /// return ExprError(); + /// + /// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably + /// want to use these instead of creating a CUDADiagBuilder yourself. + operator bool() const { return ImmediateDiagBuilder.hasValue(); } + + template + friend const CUDADiagBuilder &operator<<(const CUDADiagBuilder &Diag, + const T &Value) { + if (Diag.ImmediateDiagBuilder.hasValue()) + *Diag.ImmediateDiagBuilder << Value; + else if (Diag.PartialDiagInfo.hasValue()) + Diag.PartialDiagInfo->PD << Value; + return Diag; + } + + private: + struct PartialDiagnosticInfo { + PartialDiagnosticInfo(SourceLocation Loc, PartialDiagnostic PD, + FunctionDecl *Fn) + : Loc(Loc), PD(std::move(PD)), Fn(Fn) {} + + ~PartialDiagnosticInfo() { Fn->addDeferredDiag({Loc, std::move(PD)}); } + + SourceLocation Loc; + PartialDiagnostic PD; + FunctionDecl *Fn; + }; + + // Invariant: At most one of these Optionals has a value. + // FIXME: Switch these to a Variant once that exists. + llvm::Optional ImmediateDiagBuilder; + llvm::Optional PartialDiagInfo; + }; + + /// Creates a CUDADiagBuilder that emits the diagnostic if the current context + /// is "used as device code". + /// + /// - If CurContext is a __host__ function, does not emit any diagnostics. + /// - If CurContext is a __device__ or __global__ function, emits the + /// diagnostics immediately. + /// - If CurContext is a __host__ __device__ function and we are compiling for + /// the device, creates a deferred diagnostic which is emitted if and when + /// the function is codegen'ed. + /// + /// Example usage: + /// + /// // Variable-length arrays are not allowed in CUDA device code. + /// if (CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentCUDATarget()) + /// return ExprError(); + /// // Otherwise, continue parsing as normal. + CUDADiagBuilder CUDADiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); + + /// Creates a CUDADiagBuilder that emits the diagnostic if the current context + /// is "used as host code". + /// + /// Same as CUDADiagIfDeviceCode, with "host" and "device" switched. + CUDADiagBuilder CUDADiagIfHostCode(SourceLocation Loc, unsigned DiagID); + enum CUDAFunctionTarget { CFT_Device, CFT_Global, @@ -9253,8 +9347,18 @@ CFT_InvalidTarget }; + /// Determines whether the given function is a CUDA device/host/kernel/etc. + /// function. + /// + /// Use this rather than examining the function's attributes yourself -- you + /// will get it wrong. Returns CFT_Host if D is null. CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D); + /// Gets the CUDA target for the current context. + CUDAFunctionTarget CurrentCUDATarget() { + return IdentifyCUDATarget(dyn_cast(CurContext)); + } + // CUDA function call preference. Must be ordered numerically from // worst to best. enum CUDAFunctionPreference { @@ -9295,9 +9399,9 @@ private: /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a - /// deferred "bad call" diagnostic. We use this to avoid emitting the same - /// deferred diag twice. - llvm::DenseSet LocsWithCUDACallDeferredDiags; + /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the + /// same deferred diag twice. + llvm::DenseSet LocsWithCUDACallDiags; public: /// Check whether we're allowed to call Callee from the current context. @@ -9316,21 +9420,6 @@ /// - Otherwise, returns true without emitting any diagnostics. bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee); - /// Check whether a 'try' or 'throw' expression is allowed within the current - /// context, and raise an error or create a deferred error, as appropriate. - /// - /// 'try' and 'throw' are never allowed in CUDA __device__ functions, and are - /// allowed in __host__ __device__ functions only if those functions are never - /// codegen'ed for the device. - /// - /// ExprTy should be the string "try" or "throw", as appropriate. - bool CheckCUDAExceptionExpr(SourceLocation Loc, StringRef ExprTy); - - /// Check whether it's legal for us to create a variable-length array in the - /// current context. Returns true if the VLA is OK; returns false and emits - /// an error otherwise. - bool CheckCUDAVLA(SourceLocation Loc); - /// Set __device__ or __host__ __device__ attributes on the given lambda /// operator() method. /// Index: cfe/trunk/lib/Sema/SemaCUDA.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaCUDA.cpp +++ cfe/trunk/lib/Sema/SemaCUDA.cpp @@ -18,6 +18,7 @@ #include "clang/Sema/Lookup.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" +#include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" @@ -55,6 +56,10 @@ /// IdentifyCUDATarget - Determine the CUDA compilation target for this function Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) { + // Code that lives outside a function is run on the host. + if (D == nullptr) + return CFT_Host; + if (D->hasAttr()) return CFT_InvalidTarget; @@ -108,9 +113,8 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, const FunctionDecl *Callee) { assert(Callee && "Callee must be valid."); + CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); - CUDAFunctionTarget CallerTarget = - (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host; // If one of the targets is invalid, the check always fails, no matter what // the other target is. @@ -484,88 +488,95 @@ NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } -bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { - assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - assert(Callee && "Callee may not be null."); - FunctionDecl *Caller = dyn_cast(CurContext); - if (!Caller) - return true; - - Sema::CUDAFunctionPreference Pref = IdentifyCUDAPreference(Caller, Callee); - if (Pref == Sema::CFP_Never) { - Diag(Loc, diag::err_ref_bad_target) << IdentifyCUDATarget(Callee) << Callee - << IdentifyCUDATarget(Caller); - Diag(Callee->getLocation(), diag::note_previous_decl) << Callee; - return false; +Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, + unsigned DiagID, FunctionDecl *Fn, + Sema &S) { + switch (K) { + case K_Nop: + break; + case K_Immediate: + ImmediateDiagBuilder.emplace(S.Diag(Loc, DiagID)); + break; + case K_Deferred: + assert(Fn && "Must have a function to attach the deferred diag to."); + PartialDiagInfo.emplace(Loc, S.PDiag(DiagID), Fn); + break; } +} - // Insert into LocsWithCUDADeferredDiags to avoid emitting duplicate deferred - // diagnostics for the same location. Duplicate deferred diags are otherwise - // tricky to avoid, because, unlike with regular errors, sema checking - // proceeds unhindered when we omit a deferred diagnostic. - if (Pref == Sema::CFP_WrongSide && - LocsWithCUDACallDeferredDiags.insert(Loc.getRawEncoding()).second) { - // We have to do this odd dance to create our PartialDiagnostic because we - // want its storage to be allocated with operator new, not in an arena. - PartialDiagnostic ErrPD{PartialDiagnostic::NullDiagnostic()}; - ErrPD.Reset(diag::err_ref_bad_target); - ErrPD << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); - Caller->addDeferredDiag({Loc, std::move(ErrPD)}); - - PartialDiagnostic NotePD{PartialDiagnostic::NullDiagnostic()}; - NotePD.Reset(diag::note_previous_decl); - NotePD << Callee; - Caller->addDeferredDiag({Callee->getLocation(), std::move(NotePD)}); - - // This is not immediately an error, so return true. The deferred errors - // will be emitted if and when Caller is codegen'ed. - return true; +Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, + unsigned DiagID) { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + CUDADiagBuilder::Kind DiagKind; + switch (CurrentCUDATarget()) { + case CFT_Global: + case CFT_Device: + DiagKind = CUDADiagBuilder::K_Immediate; + break; + case CFT_HostDevice: + DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Deferred + : CUDADiagBuilder::K_Nop; + break; + default: + DiagKind = CUDADiagBuilder::K_Nop; } - return true; + return CUDADiagBuilder(DiagKind, Loc, DiagID, + dyn_cast(CurContext), *this); } -bool Sema::CheckCUDAExceptionExpr(SourceLocation Loc, StringRef ExprTy) { +Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, + unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - FunctionDecl *CurFn = dyn_cast(CurContext); - if (!CurFn) - return true; - CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); - - // Raise an error immediately if this is a __global__ or __device__ function. - // If it's a __host__ __device__ function, enqueue a deferred error which will - // be emitted if the function is codegen'ed for device. - if (Target == CFT_Global || Target == CFT_Device) { - Diag(Loc, diag::err_cuda_device_exceptions) << ExprTy << Target << CurFn; - return false; - } - if (Target == CFT_HostDevice && getLangOpts().CUDAIsDevice) { - PartialDiagnostic ErrPD{PartialDiagnostic::NullDiagnostic()}; - ErrPD.Reset(diag::err_cuda_device_exceptions); - ErrPD << ExprTy << Target << CurFn; - CurFn->addDeferredDiag({Loc, std::move(ErrPD)}); - return false; + CUDADiagBuilder::Kind DiagKind; + switch (CurrentCUDATarget()) { + case CFT_Host: + DiagKind = CUDADiagBuilder::K_Immediate; + break; + case CFT_HostDevice: + DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Nop + : CUDADiagBuilder::K_Deferred; + break; + default: + DiagKind = CUDADiagBuilder::K_Nop; } - return true; + return CUDADiagBuilder(DiagKind, Loc, DiagID, + dyn_cast(CurContext), *this); } -bool Sema::CheckCUDAVLA(SourceLocation Loc) { +bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); - FunctionDecl *CurFn = dyn_cast(CurContext); - if (!CurFn) + assert(Callee && "Callee may not be null."); + FunctionDecl *Caller = dyn_cast(CurContext); + if (!Caller) return true; - CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); - if (Target == CFT_Global || Target == CFT_Device) { - Diag(Loc, diag::err_cuda_vla) << Target; - return false; - } - if (Target == CFT_HostDevice && getLangOpts().CUDAIsDevice) { - PartialDiagnostic ErrPD{PartialDiagnostic::NullDiagnostic()}; - ErrPD.Reset(diag::err_cuda_vla); - ErrPD << Target; - CurFn->addDeferredDiag({Loc, std::move(ErrPD)}); - return false; - } - return true; + + CUDADiagBuilder::Kind DiagKind; + switch (IdentifyCUDAPreference(Caller, Callee)) { + case CFP_Never: + DiagKind = CUDADiagBuilder::K_Immediate; + break; + case CFP_WrongSide: + assert(Caller && "WrongSide calls require a non-null caller"); + DiagKind = CUDADiagBuilder::K_Deferred; + break; + default: + DiagKind = CUDADiagBuilder::K_Nop; + } + + // Avoid emitting this error twice for the same location. Using a hashtable + // like this is unfortunate, but because we must continue parsing as normal + // after encountering a deferred error, it's otherwise very tricky for us to + // ensure that we only emit this deferred error once. + if (!LocsWithCUDACallDiags.insert(Loc.getRawEncoding()).second) + return true; + + bool IsImmediateErr = + CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) + << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); + CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, + Caller, *this) + << Callee; + return !IsImmediateErr; } void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { Index: cfe/trunk/lib/Sema/SemaExprCXX.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaExprCXX.cpp +++ cfe/trunk/lib/Sema/SemaExprCXX.cpp @@ -685,7 +685,8 @@ // Exceptions aren't allowed in CUDA device code. if (getLangOpts().CUDA) - CheckCUDAExceptionExpr(OpLoc, "throw"); + CUDADiagIfDeviceCode(OpLoc, diag::err_cuda_device_exceptions) + << "throw" << CurrentCUDATarget(); if (getCurScope() && getCurScope()->isOpenMPSimdDirectiveScope()) Diag(OpLoc, diag::err_omp_simd_region_cannot_use_stmt) << "throw"; Index: cfe/trunk/lib/Sema/SemaStmt.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaStmt.cpp +++ cfe/trunk/lib/Sema/SemaStmt.cpp @@ -3648,7 +3648,8 @@ // Exceptions aren't allowed in CUDA device code. if (getLangOpts().CUDA) - CheckCUDAExceptionExpr(TryLoc, "try"); + CUDADiagIfDeviceCode(TryLoc, diag::err_cuda_device_exceptions) + << "try" << CurrentCUDATarget(); if (getCurScope() && getCurScope()->isOpenMPSimdDirectiveScope()) Diag(TryLoc, diag::err_omp_simd_region_cannot_use_stmt) << "try"; Index: cfe/trunk/lib/Sema/SemaType.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaType.cpp +++ cfe/trunk/lib/Sema/SemaType.cpp @@ -2249,8 +2249,8 @@ return QualType(); } // CUDA device code doesn't support VLAs. - if (getLangOpts().CUDA && T->isVariableArrayType() && !CheckCUDAVLA(Loc)) - return QualType(); + if (getLangOpts().CUDA && T->isVariableArrayType()) + CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentCUDATarget(); // If this is not C99, extwarn about VLA's and C99 array size modifiers. if (!getLangOpts().C99) { Index: cfe/trunk/test/SemaCUDA/exceptions-host-device.cu =================================================================== --- cfe/trunk/test/SemaCUDA/exceptions-host-device.cu +++ cfe/trunk/test/SemaCUDA/exceptions-host-device.cu @@ -14,8 +14,8 @@ throw NULL; try {} catch(void*) {} #ifndef HOST - // expected-error@-3 {{cannot use 'throw' in __host__ __device__ function 'hd1'}} - // expected-error@-3 {{cannot use 'try' in __host__ __device__ function 'hd1'}} + // expected-error@-3 {{cannot use 'throw' in __host__ __device__ function}} + // expected-error@-3 {{cannot use 'try' in __host__ __device__ function}} #endif } @@ -31,8 +31,8 @@ throw NULL; try {} catch(void*) {} #ifndef HOST - // expected-error@-3 {{cannot use 'throw' in __host__ __device__ function 'hd3'}} - // expected-error@-3 {{cannot use 'try' in __host__ __device__ function 'hd3'}} + // expected-error@-3 {{cannot use 'throw' in __host__ __device__ function}} + // expected-error@-3 {{cannot use 'try' in __host__ __device__ function}} #endif } __device__ void call_hd3() { hd3(); } Index: cfe/trunk/test/SemaCUDA/exceptions.cu =================================================================== --- cfe/trunk/test/SemaCUDA/exceptions.cu +++ cfe/trunk/test/SemaCUDA/exceptions.cu @@ -9,13 +9,13 @@ } __device__ void device() { throw NULL; - // expected-error@-1 {{cannot use 'throw' in __device__ function 'device'}} + // expected-error@-1 {{cannot use 'throw' in __device__ function}} try {} catch(void*) {} - // expected-error@-1 {{cannot use 'try' in __device__ function 'device'}} + // expected-error@-1 {{cannot use 'try' in __device__ function}} } __global__ void kernel() { throw NULL; - // expected-error@-1 {{cannot use 'throw' in __global__ function 'kernel'}} + // expected-error@-1 {{cannot use 'throw' in __global__ function}} try {} catch(void*) {} - // expected-error@-1 {{cannot use 'try' in __global__ function 'kernel'}} + // expected-error@-1 {{cannot use 'try' in __global__ function}} }