Index: cfe/trunk/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticParseKinds.td +++ cfe/trunk/include/clang/Basic/DiagnosticParseKinds.td @@ -1026,6 +1026,12 @@ def warn_cuda_attr_lambda_position : Warning< "nvcc does not allow '__%0__' to appear after '()' in lambdas">, InGroup; +def warn_pragma_force_cuda_host_device_bad_arg : Warning< + "incorrect use of #pragma clang force_cuda_host_device begin|end">, + InGroup; +def err_pragma_cannot_end_force_cuda_host_device : Error< + "force_cuda_host_device end pragma without matching " + "force_cuda_host_device begin">; } // end of Parse Issue category. let CategoryName = "Modules Issue" in { Index: cfe/trunk/include/clang/Parse/Parser.h =================================================================== --- cfe/trunk/include/clang/Parse/Parser.h +++ cfe/trunk/include/clang/Parse/Parser.h @@ -173,6 +173,7 @@ std::unique_ptr MSSection; std::unique_ptr MSRuntimeChecks; std::unique_ptr MSIntrinsic; + std::unique_ptr CUDAForceHostDeviceHandler; std::unique_ptr OptimizeHandler; std::unique_ptr LoopHintHandler; std::unique_ptr UnrollHintHandler; Index: cfe/trunk/include/clang/Sema/Sema.h =================================================================== --- cfe/trunk/include/clang/Sema/Sema.h +++ cfe/trunk/include/clang/Sema/Sema.h @@ -9219,6 +9219,20 @@ QualType FieldTy, bool IsMsStruct, Expr *BitWidth, bool *ZeroWidth = nullptr); +private: + unsigned ForceCUDAHostDeviceDepth = 0; + +public: + /// Increments our count of the number of times we've seen a pragma forcing + /// functions to be __host__ __device__. So long as this count is greater + /// than zero, all functions encountered will be __host__ __device__. + void PushForceCUDAHostDevice(); + + /// Decrements our count of the number of times we've seen a pragma forcing + /// functions to be __host__ __device__. Returns false if the count is 0 + /// before incrementing, so you can emit an error. + bool PopForceCUDAHostDevice(); + enum CUDAFunctionTarget { CFT_Device, CFT_Global, Index: cfe/trunk/include/clang/Serialization/ASTBitCodes.h =================================================================== --- cfe/trunk/include/clang/Serialization/ASTBitCodes.h +++ cfe/trunk/include/clang/Serialization/ASTBitCodes.h @@ -580,7 +580,11 @@ MSSTRUCT_PRAGMA_OPTIONS = 55, /// \brief Record code for \#pragma ms_struct options. - POINTERS_TO_MEMBERS_PRAGMA_OPTIONS = 56 + POINTERS_TO_MEMBERS_PRAGMA_OPTIONS = 56, + + /// \brief Number of unmatched #pragma clang cuda_force_host_device begin + /// directives we've seen. + CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH = 57, }; /// \brief Record types used within a source manager block. Index: cfe/trunk/include/clang/Serialization/ASTReader.h =================================================================== --- cfe/trunk/include/clang/Serialization/ASTReader.h +++ cfe/trunk/include/clang/Serialization/ASTReader.h @@ -772,6 +772,10 @@ /// Sema tracks these to emit warnings. SmallVector UnusedLocalTypedefNameCandidates; + /// \brief Our current depth in #pragma cuda force_host_device begin/end + /// macros. + unsigned ForceCUDAHostDeviceDepth = 0; + /// \brief The IDs of the declarations Sema stores directly. /// /// Sema tracks a few important decls, such as namespace std, directly. Index: cfe/trunk/include/clang/Serialization/ASTWriter.h =================================================================== --- cfe/trunk/include/clang/Serialization/ASTWriter.h +++ cfe/trunk/include/clang/Serialization/ASTWriter.h @@ -459,6 +459,7 @@ void WriteDeclContextVisibleUpdate(const DeclContext *DC); void WriteFPPragmaOptions(const FPOptions &Opts); void WriteOpenCLExtensions(Sema &SemaRef); + void WriteCUDAPragmas(Sema &SemaRef); void WriteObjCCategories(); void WriteLateParsedTemplates(Sema &SemaRef); void WriteOptimizePragmaOptions(Sema &SemaRef); Index: cfe/trunk/lib/Parse/ParsePragma.cpp =================================================================== --- cfe/trunk/lib/Parse/ParsePragma.cpp +++ cfe/trunk/lib/Parse/ParsePragma.cpp @@ -167,6 +167,16 @@ Token &FirstToken) override; }; +struct PragmaForceCUDAHostDeviceHandler : public PragmaHandler { + PragmaForceCUDAHostDeviceHandler(Sema &Actions) + : PragmaHandler("force_cuda_host_device"), Actions(Actions) {} + void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, + Token &FirstToken) override; + +private: + Sema &Actions; +}; + } // end namespace void Parser::initializePragmaHandlers() { @@ -239,6 +249,12 @@ PP.AddPragmaHandler(MSIntrinsic.get()); } + if (getLangOpts().CUDA) { + CUDAForceHostDeviceHandler.reset( + new PragmaForceCUDAHostDeviceHandler(Actions)); + PP.AddPragmaHandler("clang", CUDAForceHostDeviceHandler.get()); + } + OptimizeHandler.reset(new PragmaOptimizeHandler(Actions)); PP.AddPragmaHandler("clang", OptimizeHandler.get()); @@ -309,6 +325,11 @@ MSIntrinsic.reset(); } + if (getLangOpts().CUDA) { + PP.RemovePragmaHandler("clang", CUDAForceHostDeviceHandler.get()); + CUDAForceHostDeviceHandler.reset(); + } + PP.RemovePragmaHandler("STDC", FPContractHandler.get()); FPContractHandler.reset(); @@ -2187,3 +2208,26 @@ PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) << "intrinsic"; } +void PragmaForceCUDAHostDeviceHandler::HandlePragma( + Preprocessor &PP, PragmaIntroducerKind Introducer, Token &Tok) { + Token FirstTok = Tok; + + PP.Lex(Tok); + IdentifierInfo *Info = Tok.getIdentifierInfo(); + if (!Info || (!Info->isStr("begin") && !Info->isStr("end"))) { + PP.Diag(FirstTok.getLocation(), + diag::warn_pragma_force_cuda_host_device_bad_arg); + return; + } + + if (Info->isStr("begin")) + Actions.PushForceCUDAHostDevice(); + else if (!Actions.PopForceCUDAHostDevice()) + PP.Diag(FirstTok.getLocation(), + diag::err_pragma_cannot_end_force_cuda_host_device); + + PP.Lex(Tok); + if (!Tok.is(tok::eod)) + PP.Diag(FirstTok.getLocation(), + diag::warn_pragma_force_cuda_host_device_bad_arg); +} Index: cfe/trunk/lib/Sema/SemaCUDA.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaCUDA.cpp +++ cfe/trunk/lib/Sema/SemaCUDA.cpp @@ -23,6 +23,19 @@ #include "llvm/ADT/SmallVector.h" using namespace clang; +void Sema::PushForceCUDAHostDevice() { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + ForceCUDAHostDeviceDepth++; +} + +bool Sema::PopForceCUDAHostDevice() { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + if (ForceCUDAHostDeviceDepth == 0) + return false; + ForceCUDAHostDeviceDepth--; + return true; +} + ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, MultiExprArg ExecConfig, SourceLocation GGGLoc) { @@ -441,9 +454,23 @@ // * a __device__ function with this signature was already declared, in which // case in which case we output an error, unless the __device__ decl is in a // system header, in which case we leave the constexpr function unattributed. +// +// In addition, all function decls are treated as __host__ __device__ when +// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a +// #pragma clang force_cuda_host_device_begin/end +// pair). void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD, const LookupResult &Previous) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + + if (ForceCUDAHostDeviceDepth > 0) { + if (!NewD->hasAttr()) + NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + if (!NewD->hasAttr()) + NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + return; + } + if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || NewD->isVariadic() || NewD->hasAttr() || NewD->hasAttr() || NewD->hasAttr()) Index: cfe/trunk/lib/Serialization/ASTReader.cpp =================================================================== --- cfe/trunk/lib/Serialization/ASTReader.cpp +++ cfe/trunk/lib/Serialization/ASTReader.cpp @@ -3275,6 +3275,14 @@ UnusedLocalTypedefNameCandidates.push_back( getGlobalDeclID(F, Record[I])); break; + + case CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH: + if (Record.size() != 1) { + Error("invalid cuda pragma options record"); + return Failure; + } + ForceCUDAHostDeviceDepth = Record[0]; + break; } } } @@ -7128,6 +7136,7 @@ PragmaMSPointersToMembersState, PointersToMembersPragmaLocation); } + SemaObj->ForceCUDAHostDeviceDepth = ForceCUDAHostDeviceDepth; } IdentifierInfo *ASTReader::get(StringRef Name) { Index: cfe/trunk/lib/Serialization/ASTWriter.cpp =================================================================== --- cfe/trunk/lib/Serialization/ASTWriter.cpp +++ cfe/trunk/lib/Serialization/ASTWriter.cpp @@ -1069,6 +1069,7 @@ RECORD(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS); RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES); RECORD(DELETE_EXPRS_TO_ANALYZE); + RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH); // SourceManager Block. BLOCK(SOURCE_MANAGER_BLOCK); @@ -3942,6 +3943,13 @@ Stream.EmitRecord(OPENCL_EXTENSIONS, Record); } +void ASTWriter::WriteCUDAPragmas(Sema &SemaRef) { + if (SemaRef.ForceCUDAHostDeviceDepth > 0) { + RecordData::value_type Record[] = {SemaRef.ForceCUDAHostDeviceDepth}; + Stream.EmitRecord(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH, Record); + } +} + void ASTWriter::WriteObjCCategories() { SmallVector CategoriesMap; RecordData Categories; @@ -4619,6 +4627,7 @@ WriteIdentifierTable(PP, SemaRef.IdResolver, isModule); WriteFPPragmaOptions(SemaRef.getFPOptions()); WriteOpenCLExtensions(SemaRef); + WriteCUDAPragmas(SemaRef); WritePragmaDiagnosticMappings(Context.getDiagnostics(), isModule); // If we're emitting a module, write out the submodule information. Index: cfe/trunk/test/PCH/pragma-cuda-force-host-device.cu =================================================================== --- cfe/trunk/test/PCH/pragma-cuda-force-host-device.cu +++ cfe/trunk/test/PCH/pragma-cuda-force-host-device.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -emit-pch %s -o %t +// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -include-pch %t -S -o /dev/null %s + +#ifndef HEADER +#define HEADER + +#pragma clang force_cuda_host_device begin +#pragma clang force_cuda_host_device begin +#pragma clang force_cuda_host_device end + +void hd1() {} + +#else + +void hd2() {} + +#pragma clang force_cuda_host_device end + +void host_only() {} + +__attribute__((device)) void device() { + hd1(); + hd2(); + host_only(); // expected-error {{no matching function for call}} +} + +#endif Index: cfe/trunk/test/Parser/cuda-force-host-device-templates.cu =================================================================== --- cfe/trunk/test/Parser/cuda-force-host-device-templates.cu +++ cfe/trunk/test/Parser/cuda-force-host-device-templates.cu @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=c++14 -S -verify -fcuda-is-device %s -o /dev/null + +// Check how the force_cuda_host_device pragma interacts with template +// instantiations. The errors here are emitted at codegen, so we can't do +// -fsyntax-only. + +template +auto foo() { // expected-note {{declared here}} + return T(); +} + +template +struct X { + void foo(); // expected-note {{declared here}} +}; + +#pragma clang force_cuda_host_device begin +__attribute__((host)) __attribute__((device)) void test() { + int n = foo(); // expected-error {{reference to __host__ function 'foo'}} + X().foo(); // expected-error {{reference to __host__ function 'foo'}} +} +#pragma clang force_cuda_host_device end + +// Same thing as above, but within a force_cuda_host_device block without a +// corresponding end. + +template +T bar() { // expected-note {{declared here}} + return T(); +} + +template +struct Y { + void bar(); // expected-note {{declared here}} +}; + +#pragma clang force_cuda_host_device begin +__attribute__((host)) __attribute__((device)) void test2() { + int n = bar(); // expected-error {{reference to __host__ function 'bar'}} + Y().bar(); // expected-error {{reference to __host__ function 'bar'}} +} Index: cfe/trunk/test/Parser/cuda-force-host-device.cu =================================================================== --- cfe/trunk/test/Parser/cuda-force-host-device.cu +++ cfe/trunk/test/Parser/cuda-force-host-device.cu @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +// Check the force_cuda_host_device pragma. + +#pragma clang force_cuda_host_device begin +void f(); +#pragma clang force_cuda_host_device begin +void g(); +#pragma clang force_cuda_host_device end +void h(); +#pragma clang force_cuda_host_device end + +void i(); // expected-note {{not viable}} + +void host() { + f(); + g(); + h(); + i(); +} + +__attribute__((device)) void device() { + f(); + g(); + h(); + i(); // expected-error {{no matching function}} +} + +#pragma clang force_cuda_host_device foo +// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}} + +#pragma clang force_cuda_host_device +// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}} + +#pragma clang force_cuda_host_device begin foo +// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}}