Index: clang/include/clang/Lex/Preprocessor.h =================================================================== --- clang/include/clang/Lex/Preprocessor.h +++ clang/include/clang/Lex/Preprocessor.h @@ -318,6 +318,9 @@ /// The import path for named module that we're currently processing. SmallVector, 2> NamedModuleImportPath; + /// Whether the import is an `@import` or a standard c++ modules import. + bool IsAtImport = false; + /// Whether the last token we lexed was an '@'. bool LastTokenWasAt = false; @@ -461,6 +464,144 @@ TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; + /// Track the status of the c++20 module decl. + /// + /// module-declaration: + /// 'export'[opt] 'module' module-name module-partition[opt] + /// attribute-specifier-seq[opt] ';' + /// + /// module-name: + /// module-name-qualifier[opt] identifier + /// + /// module-partition: + /// ':' module-name-qualifier[opt] identifier + /// + /// module-name-qualifier: + /// identifier '.' + /// module-name-qualifier identifier '.' + /// + /// Transition state: + /// + /// NotAModuleDecl --- export ---> FoundExport + /// NotAModuleDecl --- module ---> ImplementationCandidate + /// FoundExport --- module ---> InterfaceCandidate + /// ImplementationCandidate --- Identifier ---> ImplementationCandidate + /// ImplementationCandidate --- period ---> ImplementationCandidate + /// ImplementationCandidate --- colon ---> ImplementationCandidate + /// InterfaceCandidate --- Identifier ---> InterfaceCandidate + /// InterfaceCandidate --- period ---> InterfaceCandidate + /// InterfaceCandidate --- colon ---> InterfaceCandidate + /// ImplementationCandidate --- Semi ---> NamedModuleImplementation + /// NamedModuleInterface --- Semi ---> NamedModuleInterface + /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation + /// NamedModuleInterface --- Anything ---> NamedModuleInterface + /// + /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad + /// soon since we don't support any module attributes yet. + class ModuleDeclSeq { + enum ModuleDeclState : int { + NotAModuleDecl, + FoundExport, + InterfaceCandidate, + ImplementationCandidate, + NamedModuleInterface, + NamedModuleImplementation, + }; + + public: + ModuleDeclSeq() : State(NotAModuleDecl) {} + + void handleExport() { + if (State == NotAModuleDecl) + State = FoundExport; + else if (!isNamedModule()) + reset(); + } + + void handleModule() { + if (State == FoundExport) + State = InterfaceCandidate; + else if (State == NotAModuleDecl) + State = ImplementationCandidate; + else if (!isNamedModule()) + reset(); + } + + void handleIdentifier(IdentifierInfo *Identifier) { + if (isModuleCandidate() && Identifier) + Name += Identifier->getName().str(); + else if (!isNamedModule()) + reset(); + } + + void handleColon() { + if (isModuleCandidate()) + Name += ":"; + else if (!isNamedModule()) + reset(); + } + + void handlePeriod() { + if (isModuleCandidate()) + Name += "."; + else if (!isNamedModule()) + reset(); + } + + void handleSemi() { + if (!Name.empty() && isModuleCandidate()) { + if (State == InterfaceCandidate) + State = NamedModuleInterface; + else if (State == ImplementationCandidate) + State = NamedModuleImplementation; + else + llvm_unreachable("Unimaged ModuleDeclState."); + } else if (!isNamedModule()) + reset(); + } + + void handleMisc() { + if (!isNamedModule()) + reset(); + } + + bool isModuleCandidate() const { + return State == InterfaceCandidate || State == ImplementationCandidate; + } + + bool isNamedModule() const { + return State == NamedModuleInterface || + State == NamedModuleImplementation; + } + + bool isNamedInterface() const { return State == NamedModuleInterface; } + + bool isImplementationUnit() const { + return State == NamedModuleImplementation && !getName().contains(':'); + } + + StringRef getName() const { + assert(isNamedModule() && "Can't get name from a non named module"); + return Name; + } + + StringRef getPrimaryName() const { + assert(isNamedModule() && "Can't get name from a non named module"); + return getName().split(':').first; + } + + void reset() { + Name.clear(); + State = NotAModuleDecl; + } + + private: + ModuleDeclState State; + std::string Name; + }; + + ModuleDeclSeq ModuleDeclState; + /// Whether the module import expects an identifier next. Otherwise, /// it expects a '.' or ';'. bool ModuleImportExpectsIdentifier = false; @@ -2227,6 +2368,36 @@ /// Retrieves the module whose implementation we're current compiling, if any. Module *getCurrentModuleImplementation(); + /// If we are preprocessing a named module. + bool isNamedModule() const { return ModuleDeclState.isNamedModule(); } + + /// If we are proprocessing a named interface unit. + /// Note that a module implementation partition is not considered as an + /// named interface unit here although it is importable + /// to ease the parsing. + bool isNamedInterfaceUnit() const { + return ModuleDeclState.isNamedInterface(); + } + + /// Get the named module name we're preprocessing. + /// Requires we're preprocessing a named module. + StringRef getNamedModuleName() const { return ModuleDeclState.getName(); } + + /// If we are implementing an implementation module unit. + /// Note that the module implementation partition is not considered as an + /// implementation unit. + bool isImplementationUnit() const { + return ModuleDeclState.isImplementationUnit(); + } + + /// If we're importing a standard C++20 Named Modules. + bool isImportingCXXNamedModules() const { + // NamedModuleImportPath will be non-empty only if we're importing + // Standard C++ named modules. + return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules && + !IsAtImport; + } + /// Allocate a new MacroInfo object with the provided SourceLocation. MacroInfo *AllocateMacroInfo(SourceLocation L); Index: clang/lib/Lex/Preprocessor.cpp =================================================================== --- clang/lib/Lex/Preprocessor.cpp +++ clang/lib/Lex/Preprocessor.cpp @@ -872,6 +872,7 @@ CurLexerKind != CLK_CachingLexer) { ModuleImportLoc = Identifier.getLocation(); NamedModuleImportPath.clear(); + IsAtImport = true; ModuleImportExpectsIdentifier = true; CurLexerKind = CLK_LexAfterModuleImport; } @@ -939,6 +940,7 @@ case tok::semi: TrackGMFState.handleSemi(); StdCXXImportSeqState.handleSemi(); + ModuleDeclState.handleSemi(); break; case tok::header_name: case tok::annot_header_unit: @@ -947,6 +949,13 @@ case tok::kw_export: TrackGMFState.handleExport(); StdCXXImportSeqState.handleExport(); + ModuleDeclState.handleExport(); + break; + case tok::colon: + ModuleDeclState.handleColon(); + break; + case tok::period: + ModuleDeclState.handlePeriod(); break; case tok::identifier: if (Result.getIdentifierInfo()->isModulesImport()) { @@ -955,18 +964,25 @@ if (StdCXXImportSeqState.afterImportSeq()) { ModuleImportLoc = Result.getLocation(); NamedModuleImportPath.clear(); + IsAtImport = false; ModuleImportExpectsIdentifier = true; CurLexerKind = CLK_LexAfterModuleImport; } break; } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); + ModuleDeclState.handleModule(); break; + } else { + ModuleDeclState.handleIdentifier(Result.getIdentifierInfo()); + if (ModuleDeclState.isModuleCandidate()) + break; } [[fallthrough]]; default: TrackGMFState.handleMisc(); StdCXXImportSeqState.handleMisc(); + ModuleDeclState.handleMisc(); break; } } @@ -1150,6 +1166,15 @@ if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { if (LexHeaderName(Result)) return true; + + if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) { + std::string Name = ModuleDeclState.getPrimaryName().str(); + Name += ":"; + NamedModuleImportPath.push_back( + {getIdentifierInfo(Name), Result.getLocation()}); + CurLexerKind = CLK_LexAfterModuleImport; + return true; + } } else { Lex(Result); } @@ -1163,9 +1188,10 @@ /*DisableMacroExpansion*/ true, /*IsReinject*/ false); }; + bool ImportingHeader = Result.is(tok::header_name); // Check for a header-name. SmallVector Suffix; - if (Result.is(tok::header_name)) { + if (ImportingHeader) { // Enter the header-name token into the token stream; a Lex action cannot // both return a token and cache tokens (doing so would corrupt the token // cache if the call to Lex comes from CachingLex / PeekAhead). @@ -1243,8 +1269,8 @@ if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { // We expected to see an identifier here, and we did; continue handling // identifiers. - NamedModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), - Result.getLocation())); + NamedModuleImportPath.push_back( + std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); ModuleImportExpectsIdentifier = false; CurLexerKind = CLK_LexAfterModuleImport; return true; @@ -1284,7 +1310,8 @@ std::string FlatModuleName; if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) { for (auto &Piece : NamedModuleImportPath) { - if (!FlatModuleName.empty()) + // If the FlatModuleName ends with colon, it implies it is a partition. + if (!FlatModuleName.empty() && FlatModuleName.back() != ':') FlatModuleName += "."; FlatModuleName += Piece.first->getName(); } @@ -1295,7 +1322,8 @@ } Module *Imported = nullptr; - if (getLangOpts().Modules) { + // We don't/shouldn't load the standard c++20 modules when preprocessing. + if (getLangOpts().Modules && !isImportingCXXNamedModules()) { Imported = TheModuleLoader.loadModule(ModuleImportLoc, NamedModuleImportPath, Module::Hidden, @@ -1303,6 +1331,7 @@ if (Imported) makeModuleVisible(Imported, SemiLoc); } + if (Callbacks) Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported); Index: clang/unittests/Lex/CMakeLists.txt =================================================================== --- clang/unittests/Lex/CMakeLists.txt +++ clang/unittests/Lex/CMakeLists.txt @@ -7,6 +7,7 @@ HeaderMapTest.cpp HeaderSearchTest.cpp LexerTest.cpp + ModuleDeclStateTest.cpp PPCallbacksTest.cpp PPConditionalDirectiveRecordTest.cpp PPDependencyDirectivesTest.cpp @@ -17,6 +18,7 @@ PRIVATE clangAST clangBasic + clangFrontend clangLex clangParse clangSema Index: clang/unittests/Lex/ModuleDeclStateTest.cpp =================================================================== --- /dev/null +++ clang/unittests/Lex/ModuleDeclStateTest.cpp @@ -0,0 +1,348 @@ +//===- unittests/Lex/ModuleDeclStateTest.cpp - PPCallbacks tests ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "gtest/gtest.h" +#include +#include + +using namespace clang; + +namespace { + +class CheckNamedModuleImportingCB : public PPCallbacks { + Preprocessor &PP; + std::vector IsImportingNamedModulesAssertions; + std::size_t NextCheckingIndex; + +public: + CheckNamedModuleImportingCB(Preprocessor &PP, + std::initializer_list lists) + : PP(PP), IsImportingNamedModulesAssertions(lists), NextCheckingIndex(0) { + } + + void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, + const Module *Imported) override { + ASSERT_TRUE(NextCheckingIndex < IsImportingNamedModulesAssertions.size()); + EXPECT_EQ(PP.isImportingCXXNamedModules(), + IsImportingNamedModulesAssertions[NextCheckingIndex]); + NextCheckingIndex++; + + ASSERT_EQ(Imported, nullptr); + } + + // Currently, only the named module will be handled by `moduleImport` + // callback. + std::size_t importNamedModuleNum() { return NextCheckingIndex; } +}; +class ModuleDeclStateTest : public ::testing::Test { +protected: + ModuleDeclStateTest() + : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), + Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions), Invocation() { + TargetOpts->Triple = "x86_64-unknown-linux-gnu"; + Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); + } + + LangOptions &getLangOpts(ArrayRef CommandLineArgs) { + CompilerInvocation::CreateFromArgs(Invocation, CommandLineArgs, Diags); + return *Invocation.getLangOpts(); + } + + std::unique_ptr + getPreprocessor(const char *source, ArrayRef CommandLineArgs) { + std::unique_ptr Buf = + llvm::MemoryBuffer::getMemBuffer(source); + SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); + + LangOptions &LangOpts = getLangOpts(CommandLineArgs); + HeaderInfo.emplace(std::make_shared(), SourceMgr, + Diags, LangOpts, Target.get()); + + return std::make_unique( + std::make_shared(), Diags, LangOpts, SourceMgr, + *HeaderInfo, ModLoader, + /*IILookup =*/nullptr, + /*OwnsHeaderSearch =*/false); + } + + void preprocess(Preprocessor &PP, std::unique_ptr C) { + PP.Initialize(*Target); + PP.addPPCallbacks(std::move(C)); + PP.EnterMainSourceFile(); + + while (1) { + Token tok; + PP.Lex(tok); + if (tok.is(tok::eof)) + break; + } + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr DiagID; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + std::shared_ptr TargetOpts; + IntrusiveRefCntPtr Target; + CompilerInvocation Invocation; + TrivialModuleLoader ModLoader; + llvm::Optional HeaderInfo; +}; + +TEST_F(ModuleDeclStateTest, NamedModuleInterface) { + const char *source = R"( +export module foo; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 0); + EXPECT_TRUE(PP->isNamedModule()); + EXPECT_TRUE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); + EXPECT_EQ(PP->getNamedModuleName(), "foo"); +} + +TEST_F(ModuleDeclStateTest, NamedModuleImplementation) { + const char *source = R"( +module foo; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 0); + EXPECT_TRUE(PP->isNamedModule()); + EXPECT_FALSE(PP->isNamedInterfaceUnit()); + EXPECT_TRUE(PP->isImplementationUnit()); + EXPECT_EQ(PP->getNamedModuleName(), "foo"); +} + +TEST_F(ModuleDeclStateTest, ModuleImplementationPartition) { + const char *source = R"( +module foo:part; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 0); + EXPECT_TRUE(PP->isNamedModule()); + EXPECT_FALSE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); + EXPECT_EQ(PP->getNamedModuleName(), "foo:part"); +} + +TEST_F(ModuleDeclStateTest, ModuleInterfacePartition) { + const char *source = R"( +export module foo:part; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 0); + EXPECT_TRUE(PP->isNamedModule()); + EXPECT_TRUE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); + EXPECT_EQ(PP->getNamedModuleName(), "foo:part"); +} + +TEST_F(ModuleDeclStateTest, ModuleNameWithDot) { + const char *source = R"( +export module foo.dot:part.dot; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 0); + EXPECT_TRUE(PP->isNamedModule()); + EXPECT_TRUE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); + EXPECT_EQ(PP->getNamedModuleName(), "foo.dot:part.dot"); +} + +TEST_F(ModuleDeclStateTest, NotModule) { + const char *source = R"( +// export module foo:part; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 0); + EXPECT_FALSE(PP->isNamedModule()); + EXPECT_FALSE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); +} + +TEST_F(ModuleDeclStateTest, ModuleWithGMF) { + const char *source = R"( +module; +#include "bar.h" +#include +import "bar"; +import ; +export module foo:part; +import "HU"; +import M; +import :another; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {true, true}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 2); + EXPECT_TRUE(PP->isNamedModule()); + EXPECT_TRUE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); + EXPECT_EQ(PP->getNamedModuleName(), "foo:part"); +} + +TEST_F(ModuleDeclStateTest, ModuleWithGMFWithClangNamedModule) { + const char *source = R"( +module; +#include "bar.h" +#include +import "bar"; +import ; +export module foo:part; +import "HU"; +import M; +import :another; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {true, true}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 2); + EXPECT_TRUE(PP->isNamedModule()); + EXPECT_TRUE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); + EXPECT_EQ(PP->getNamedModuleName(), "foo:part"); +} + +TEST_F(ModuleDeclStateTest, ImportsInNormalTU) { + const char *source = R"( +#include "bar.h" +#include +import "bar"; +import ; +import "HU"; +import M; +// We can't import a partition in non-module TU. +import :another; + )"; + std::unique_ptr PP = getPreprocessor(source, "-std=c++20"); + + std::initializer_list ImportKinds = {true}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 1); + EXPECT_FALSE(PP->isNamedModule()); + EXPECT_FALSE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); +} + +TEST_F(ModuleDeclStateTest, ImportAClangNamedModule) { + const char *source = R"( +@import anything; + )"; + std::unique_ptr PP = + getPreprocessor(source, {"-fmodules", "-fimplicit-module-maps", "-x", + "objective-c++", "-std=c++20"}); + + std::initializer_list ImportKinds = {false}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 1); + EXPECT_FALSE(PP->isNamedModule()); + EXPECT_FALSE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); +} + +TEST_F(ModuleDeclStateTest, ImportWixedForm) { + const char *source = R"( +import "HU"; +@import anything; +import M; +@import another; +import M2; + )"; + std::unique_ptr PP = + getPreprocessor(source, {"-fmodules", "-fimplicit-module-maps", "-x", + "objective-c++", "-std=c++20"}); + + std::initializer_list ImportKinds = {false, true, false, true}; + preprocess(*PP, + std::make_unique(*PP, ImportKinds)); + + auto *Callback = + static_cast(PP->getPPCallbacks()); + EXPECT_EQ(Callback->importNamedModuleNum(), 4); + EXPECT_FALSE(PP->isNamedModule()); + EXPECT_FALSE(PP->isNamedInterfaceUnit()); + EXPECT_FALSE(PP->isImplementationUnit()); +} + +} // namespace