diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -385,6 +385,7 @@ bool atTopLevel() { return S <= 0; } bool afterImportSeq() { return S == AfterImportSeq; } + bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; } private: State S; @@ -397,6 +398,67 @@ /// Our current position within a C++20 import-seq. ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq; + /// Track whether we are in a Global Module Fragment + class TrackGMF { + public: + enum GMFState : int { + GMFActive = 1, + MaybeGMF = 0, + BeforeGMFIntroducer = -1, + GMFAbsentOrEnded = -2, + }; + + TrackGMF(GMFState S) : S(S) {} + + /// Saw a semicolon. + void handleSemi() { + // If it is immediately after the first instance of the module keyword, + // then that introduces the GMF. + if (S == MaybeGMF) + S = GMFActive; + } + + /// Saw an 'export' identifier. + void handleExport() { + // The presence of an 'export' keyword always ends or excludes a GMF. + S = GMFAbsentOrEnded; + } + + /// Saw an 'import' identifier. + void handleImport(bool AfterTopLevelTokenSeq) { + // If we see this before any 'module' kw, then we have no GMF. + if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) + S = GMFAbsentOrEnded; + } + + /// Saw a 'module' identifier. + void handleModule(bool AfterTopLevelTokenSeq) { + // This was the first module identifier and not preceded by any token + // that would exclude a GMF. It could begin a GMF, but only if directly + // followed by a semicolon. + if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) + S = MaybeGMF; + else + S = GMFAbsentOrEnded; + } + + /// Saw any other token. + void handleMisc() { + // We saw something other than ; after the 'module' kw, so not a GMF. + if (S == MaybeGMF) + S = GMFAbsentOrEnded; + } + + bool inGMF() { return S == GMFActive; } + + private: + /// Track the transitions into and out of a Global Module Fragment, + /// if one is present. + GMFState S; + }; + + TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; + /// Whether the module import expects an identifier next. Otherwise, /// it expects a '.' or ';'. bool ModuleImportExpectsIdentifier = false; @@ -2414,6 +2476,7 @@ None, ModuleBegin, ModuleImport, + HeaderUnitImport, SkippedModuleImport, Failure, } Kind; diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1983,6 +1983,10 @@ EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_module_begin, Action.ModuleForHeader); break; + case ImportAction::HeaderUnitImport: + EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit, + Action.ModuleForHeader); + break; case ImportAction::ModuleImport: EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_module_include, Action.ModuleForHeader); @@ -2191,6 +2195,17 @@ // known to have no effect beyond its effect on module visibility -- that is, // if it's got an include guard that is already defined, set to Import if it // is a modular header we've already built and should import. + + // For C++20 Modules + // [cpp.include]/7 If the header identified by the header-name denotes an + // importable header, it is implementation-defined whether the #include + // preprocessing directive is instead replaced by an import directive. + // For this implementation, the translation is permitted when we are parsing + // the Global Module Fragment, and not otherwise (the cases where it would be + // valid to replace an include with an import are highly constrained once in + // named module purview; this choice avoids considerable complexity in + // determining valid cases). + enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter; if (PPOpts->SingleFileParseMode) @@ -2203,13 +2218,34 @@ alreadyIncluded(*File)) Action = IncludeLimitReached; + bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule && + !isForModuleBuilding(SuggestedModule.getModule(), + getLangOpts().CurrentModule, + getLangOpts().ModuleName); + + // FIXME: We do not have a good way to disambiguate C++ clang modules from + // C++ standard modules (other than use/non-use of Header Units). + Module *SM = SuggestedModule.getModule(); + // Maybe a usable Header Unit + bool UsableHeaderUnit = false; + if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) { + if (TrackGMFState.inGMF() || IsImportDecl) + UsableHeaderUnit = true; + else if (!IsImportDecl) { + // This is a Header Unit that we do not include-translate + SuggestedModule = ModuleMap::KnownHeader(); + SM = nullptr; + } + } + // Maybe a usable clang header module. + bool UsableHeaderModule = + (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM && + !SM->isHeaderUnit(); + // Determine whether we should try to import the module for this #include, if // there is one. Don't do so if precompiled module support is disabled or we // are processing this module textually (because we're building the module). - if (Action == Enter && File && SuggestedModule && getLangOpts().Modules && - !isForModuleBuilding(SuggestedModule.getModule(), - getLangOpts().CurrentModule, - getLangOpts().ModuleName)) { + if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) { // If this include corresponds to a module but that module is // unavailable, diagnose the situation and bail out. // FIXME: Remove this; loadModule does the same check (but produces @@ -2226,7 +2262,7 @@ // FIXME: Should we have a second loadModule() overload to avoid this // extra lookup step? SmallVector, 2> Path; - for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent) + for (Module *Mod = SM; Mod; Mod = Mod->Parent) Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name), FilenameTok.getLocation())); std::reverse(Path.begin(), Path.end()); @@ -2293,9 +2329,12 @@ // Ask HeaderInfo if we should enter this #include file. If not, #including // this file will have no effect. if (Action == Enter && File && - !HeaderInfo.ShouldEnterIncludeFile( - *this, &File->getFileEntry(), EnterOnce, getLangOpts().Modules, - SuggestedModule.getModule(), IsFirstIncludeOfFile)) { + !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(), + EnterOnce, getLangOpts().Modules, SM, + IsFirstIncludeOfFile)) { + // standard modules: + // If we are not in the GMF, then we textually include only + // clang modules: // Even if we've already preprocessed this header once and know that we // don't need to see its contents again, we still need to import it if it's // modular because we might not have imported it from this submodule before. @@ -2303,7 +2342,10 @@ // FIXME: We don't do this when compiling a PCH because the AST // serialization layer can't cope with it. This means we get local // submodule visibility semantics wrong in that case. - Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip; + if (UsableHeaderUnit && !getLangOpts().CompilingPCH) + Action = TrackGMFState.inGMF() ? Import : Skip; + else + Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip; } // Check for circular inclusion of the main file. @@ -2440,8 +2482,8 @@ switch (Action) { case Skip: // If we don't need to enter the file, stop now. - if (Module *M = SuggestedModule.getModule()) - return {ImportAction::SkippedModuleImport, M}; + if (SM) + return {ImportAction::SkippedModuleImport, SM}; return {ImportAction::None}; case IncludeLimitReached: @@ -2451,16 +2493,15 @@ case Import: { // If this is a module import, make it visible if needed. - Module *M = SuggestedModule.getModule(); - assert(M && "no module to import"); + assert(SM && "no module to import"); - makeModuleVisible(M, EndLoc); + makeModuleVisible(SM, EndLoc); if (IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp___include_macros) return {ImportAction::None}; - return {ImportAction::ModuleImport, M}; + return {ImportAction::ModuleImport, SM}; } case Enter: @@ -2492,13 +2533,14 @@ return {ImportAction::None}; // Determine if we're switching to building a new submodule, and which one. - if (auto *M = SuggestedModule.getModule()) { - if (M->getTopLevelModule()->ShadowingModule) { + // This does not apply for C++20 modules header units. + if (SM && !SM->isHeaderUnit()) { + if (SM->getTopLevelModule()->ShadowingModule) { // We are building a submodule that belongs to a shadowed module. This // means we find header files in the shadowed module. - Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule) - << M->getFullModuleName(); - Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc, + Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule) + << SM->getFullModuleName(); + Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc, diag::note_previous_definition); return {ImportAction::None}; } @@ -2511,22 +2553,22 @@ // that PCH, which means we should enter the submodule. We need to teach // the AST serialization layer to deal with the resulting AST. if (getLangOpts().CompilingPCH && - isForModuleBuilding(M, getLangOpts().CurrentModule, + isForModuleBuilding(SM, getLangOpts().CurrentModule, getLangOpts().ModuleName)) return {ImportAction::None}; assert(!CurLexerSubmodule && "should not have marked this as a module yet"); - CurLexerSubmodule = M; + CurLexerSubmodule = SM; // Let the macro handling code know that any future macros are within // the new submodule. - EnterSubmodule(M, EndLoc, /*ForPragma*/false); + EnterSubmodule(SM, EndLoc, /*ForPragma*/ false); // Let the parser know that any future declarations are within the new // submodule. // FIXME: There's no point doing this if we're handling a #__include_macros // directive. - return {ImportAction::ModuleBegin, M}; + return {ImportAction::ModuleBegin, SM}; } assert(!IsImportDecl && "failed to diagnose missing module for import decl"); diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -941,6 +941,9 @@ // Update ImportSeqState to track our position within a C++20 import-seq // if this token is being produced as a result of phase 4 of translation. + // Update TrackGMFState to decide if we are currently in a Global Module + // Fragment. GMF state updates should precede ImportSeq ones, since GMF state + // depends on the prevailing ImportSeq state in two cases. if (getLangOpts().CPlusPlusModules && LexLevel == 1 && !Result.getFlag(Token::IsReinjected)) { switch (Result.getKind()) { @@ -953,7 +956,11 @@ case tok::r_brace: ImportSeqState.handleCloseBrace(); break; + // This token is injected to represent the translation of '#include "a.h"' + // into "import a.h;". Mimic the notional ';'. + case tok::annot_module_include: case tok::semi: + TrackGMFState.handleSemi(); ImportSeqState.handleSemi(); break; case tok::header_name: @@ -961,10 +968,12 @@ ImportSeqState.handleHeaderName(); break; case tok::kw_export: + TrackGMFState.handleExport(); ImportSeqState.handleExport(); break; case tok::identifier: if (Result.getIdentifierInfo()->isModulesImport()) { + TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq()); ImportSeqState.handleImport(); if (ImportSeqState.afterImportSeq()) { ModuleImportLoc = Result.getLocation(); @@ -973,9 +982,13 @@ CurLexerKind = CLK_LexAfterModuleImport; } break; + } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { + TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq()); + break; } LLVM_FALLTHROUGH; default: + TrackGMFState.handleMisc(); ImportSeqState.handleMisc(); break; } @@ -1222,6 +1235,7 @@ LLVM_FALLTHROUGH; case ImportAction::ModuleImport: + case ImportAction::HeaderUnitImport: case ImportAction::SkippedModuleImport: // We chose to import (or textually enter) the file. Convert the // header-name token into a header unit annotation token. diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -663,12 +663,23 @@ return false; } - case tok::annot_module_include: - Actions.ActOnModuleInclude(Tok.getLocation(), - reinterpret_cast( - Tok.getAnnotationValue())); + case tok::annot_module_include: { + auto Loc = Tok.getLocation(); + Module *Mod = reinterpret_cast(Tok.getAnnotationValue()); + // FIXME: We need a better way to disambiguate C++ clang modules and + // standard C++ modules. + if (!getLangOpts().CPlusPlusModules || !Mod->isHeaderUnit() || + getLangOpts().ModulesTS) + Actions.ActOnModuleInclude(Loc, Mod); + else { + DeclResult Import = + Actions.ActOnModuleImport(Loc, SourceLocation(), Loc, Mod); + Decl *ImportDecl = Import.isInvalid() ? nullptr : Import.get(); + Result = Actions.ConvertDeclToDeclGroup(ImportDecl); + } ConsumeAnnotationToken(); return false; + } case tok::annot_module_begin: Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast( diff --git a/clang/test/Modules/cxx20-include-translation.cpp b/clang/test/Modules/cxx20-include-translation.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Modules/cxx20-include-translation.cpp @@ -0,0 +1,109 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// RUN: cd %t +// +// RUN: %clang_cc1 -std=c++20 -xc++-user-header h1.h -emit-header-unit -o h1.pcm +// RUN: %clang_cc1 -std=c++20 -xc++-user-header h2.h -emit-header-unit -o h2.pcm +// RUN: %clang_cc1 -std=c++20 -xc++-user-header h3.h -emit-header-unit -o h3.pcm +// RUN: %clang_cc1 -std=c++20 -xc++-user-header h4.h -emit-header-unit -o h4.pcm + +// RUN: %clang_cc1 -std=c++20 Xlate.cpp -emit-module-interface -o Xlate.pcm \ +// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \ +// RUN: -fmodule-file=h4.pcm -fsyntax-only -Wauto-import -verify + +// Check that we do the intended translation and not more. +// RUN: %clang_cc1 -std=c++20 Xlate.cpp \ +// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \ +// RUN: -fmodule-file=h4.pcm -E -undef | FileCheck %s + +// We expect no diagnostics here, the used functions should all be available. +// RUN: %clang_cc1 -std=c++20 Xlate.cpp -emit-module-interface \ +// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \ +// RUN: -fmodule-file=h4.pcm -fsyntax-only + +// The content of the headers is not terribly important, we just want to check +// whether they are textually included or include-translated. +//--- h1.h +#ifndef H1_GUARD +#define H1_GUARD + +#define ONE 1 + +void foo(); + +#endif // H1_GUARD + +//--- h2.h +#ifndef H2_GUARD +#define H2_GUARD + +#define TWO 2 + +void bar(); + +#endif // H2_GUARD + +//--- h3.h +#ifndef H3_GUARD +#define H3_GUARD + +#define THREE 3 + +void baz(); + +#endif // H3_GUARD + +//--- h4.h +#ifndef H4_GUARD +#define H4_GUARD + +#define FOUR 4 + +void boo(); + +#endif // H4_GUARD + +//--- h5.h +#ifndef H5_GUARD +#define H5_GUARD + +#define FIVE 5 + +void five(); + +#endif // H4_GUARD + +//--- Xlate.cpp +/* some comment ... + ... */ +module /*nothing here*/; + +// This should be include-translated, when the header unit for h1 is available. +#include "h1.h" // expected-warning {{treating #include as an import of module './h1.h'}} +// Import of a header unit is allowed, named modules are not. +import "h2.h"; +// A regular, untranslated, header +#include "h5.h" + +export module Xlate; + +// This is OK, the import immediately follows the module decl. +import "h3.h"; + +// This should *not* be include-translated, even if header unit for h4 is +// available. +#include "h4.h" + +export void charlie() { + foo(); + bar(); + baz(); + boo(); + five(); +} + +// CHECK: #pragma clang module import "./h1.h" +// CHECK: import ./h2.h +// CHECK: import ./h3.h +// CHECK-NOT: #pragma clang module import "./h4.h"