diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h --- a/clang/include/clang/Lex/DependencyDirectivesScanner.h +++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h @@ -19,15 +19,41 @@ #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" namespace clang { +namespace tok { +enum TokenKind : unsigned short; +} + class DiagnosticsEngine; namespace dependency_directives_scan { +/// Token lexed as part of dependency directive scanning. +struct Token { + /// Offset into the original source input. + unsigned Offset; + unsigned Length; + tok::TokenKind Kind; + unsigned short Flags; + + Token(unsigned Offset, unsigned Length, tok::TokenKind Kind, + unsigned short Flags) + : Offset(Offset), Length(Length), Kind(Kind), Flags(Flags) {} + + unsigned getEnd() const { return Offset + Length; } + + bool is(tok::TokenKind K) const { return Kind == K; } + bool isNot(tok::TokenKind K) const { return Kind != K; } + bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { + return is(K1) || is(K2); + } + template bool isOneOf(tok::TokenKind K1, Ts... Ks) const { + return is(K1) || isOneOf(Ks...); + } +}; + /// Represents the kind of preprocessor directive or a module declaration that /// is tracked by the scanner in its token output. enum DirectiveKind : uint8_t { @@ -52,9 +78,10 @@ pp_else, pp_endif, decl_at_import, - cxx_export_decl, cxx_module_decl, cxx_import_decl, + cxx_export_module_decl, + cxx_export_import_decl, pp_eof, }; @@ -62,53 +89,48 @@ /// scanning. It's used to track various preprocessor directives that could /// potentially have an effect on the depedencies. struct Directive { + ArrayRef Tokens; + /// The kind of token. DirectiveKind Kind = pp_none; - /// Offset into the output byte stream of where the directive begins. - int Offset = -1; - - Directive(DirectiveKind K, int Offset) : Kind(K), Offset(Offset) {} -}; - -/// Simplified token range to track the range of a potentially skippable PP -/// directive. -struct SkippedRange { - /// Offset into the output byte stream of where the skipped directive begins. - int Offset; - - /// The number of bytes that can be skipped before the preprocessing must - /// resume. - int Length; + Directive() = default; + Directive(DirectiveKind K, ArrayRef Tokens) + : Tokens(Tokens), Kind(K) {} }; -/// Computes the potential source ranges that can be skipped by the preprocessor -/// when skipping a directive like #if, #ifdef or #elsif. -/// -/// \returns false on success, true on error. -bool computeSkippedRanges(ArrayRef Input, - llvm::SmallVectorImpl &Range); - } // end namespace dependency_directives_scan -/// Minimize the input down to the preprocessor directives that might have +/// Scan the input for the preprocessor directives that might have /// an effect on the dependencies for a compilation unit. /// -/// This function deletes all non-preprocessor code, and strips anything that -/// can't affect what gets included. It canonicalizes whitespace where -/// convenient to stabilize the output against formatting changes in the input. -/// -/// Clears the output vectors at the beginning of the call. +/// This function ignores all non-preprocessor code and anything that +/// can't affect what gets included. /// /// \returns false on success, true on error. If the diagnostic engine is not /// null, an appropriate error is reported using the given input location -/// with the offset that corresponds to the minimizer's current buffer offset. +/// with the offset that corresponds to the \p Input buffer offset. bool scanSourceForDependencyDirectives( - llvm::StringRef Input, llvm::SmallVectorImpl &Output, - llvm::SmallVectorImpl &Directives, + StringRef Input, SmallVectorImpl &Tokens, + SmallVectorImpl &Directives, DiagnosticsEngine *Diags = nullptr, SourceLocation InputSourceLoc = SourceLocation()); +/// Print the previously scanned dependency directives as minimized source text. +/// +/// \param Source The original source text that the dependency directives were +/// scanned from. +/// \param Directives The previously scanned dependency +/// directives. +/// \param OS the stream to print the dependency directives on. +/// +/// This is used primarily for testing purposes, during dependency scanning the +/// \p Lexer uses the tokens directly, not their printed version. +void printDependencyDirectivesAsSource( + StringRef Source, + ArrayRef Directives, + llvm::raw_ostream &OS); + } // end namespace clang #endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -16,6 +16,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Lex/PreprocessorLexer.h" #include "clang/Lex/Token.h" #include "llvm/ADT/Optional.h" @@ -149,6 +150,13 @@ // CurrentConflictMarkerState - The kind of conflict marker we are handling. ConflictMarkerKind CurrentConflictMarkerState; + /// Non-empty if this \p Lexer is \p isDependencyDirectivesLexer(). + ArrayRef DepDirectives; + + /// If this \p Lexer is \p isDependencyDirectivesLexer(), it represents the + /// next token to use from the current dependency directive. + unsigned NextDepDirectiveTokenIndex = 0; + void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd); public: @@ -195,6 +203,23 @@ /// return the tok::eof token. This implicitly involves the preprocessor. bool Lex(Token &Result); + /// Called when the preprocessor is in 'dependency scanning lexing mode'. + bool LexDependencyDirectiveToken(Token &Result); + + /// Called when the preprocessor is in 'dependency scanning lexing mode' and + /// is skipping a conditional block. + bool LexDependencyDirectiveTokenWhileSkipping(Token &Result); + + /// True when the preprocessor is in 'dependency scanning lexing mode' and + /// created this \p Lexer for lexing a set of dependency directive tokens. + bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); } + + /// Initializes \p Result with data from \p DDTok and advances \p BufferPtr to + /// the position just after the token. + /// \returns the buffer pointer at the beginning of the token. + const char *convertDependencyDirectiveToken( + const dependency_directives_scan::Token &DDTok, Token &Result); + public: /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma. bool isPragmaLexer() const { return Is_PragmaLexer; } @@ -288,14 +313,8 @@ return BufferPtr - BufferStart; } - /// Skip over \p NumBytes bytes. - /// - /// If the skip is successful, the next token will be lexed from the new - /// offset. The lexer also assumes that we skipped to the start of the line. - /// - /// \returns true if the skip failed (new offset would have been past the - /// end of the buffer), false otherwise. - bool skipOver(unsigned NumBytes); + /// Set the lexer's buffer pointer to \p Offset. + void seek(unsigned Offset, bool IsAtStartOfLine); /// Stringify - Convert the specified string into a C string by i) escaping /// '\\' and " characters and ii) replacing newline character(s) with "\\n". diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -29,7 +29,6 @@ #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PPCallbacks.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/ArrayRef.h" @@ -558,6 +557,7 @@ CLK_Lexer, CLK_TokenLexer, CLK_CachingLexer, + CLK_DependencyDirectivesLexer, CLK_LexAfterModuleImport } CurLexerKind = CLK_Lexer; @@ -2595,14 +2595,6 @@ void emitMacroDeprecationWarning(const Token &Identifier) const; void emitRestrictExpansionWarning(const Token &Identifier) const; void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; - - Optional - getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc); - - /// Contains the currently active skipped range mappings for skipping excluded - /// conditional directives. - ExcludedPreprocessorDirectiveSkipMapping - *ExcludedConditionalDirectiveSkipMappings; }; /// Abstract base class that describes a handler that will receive diff --git a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h deleted file mode 100644 --- a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- PreprocessorExcludedConditionalDirectiveSkipMapping.h - --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H -#define LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H - -#include "clang/Basic/LLVM.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/MemoryBuffer.h" - -namespace clang { - -/// A mapping from an offset into a buffer to the number of bytes that can be -/// skipped by the preprocessor when skipping over excluded conditional -/// directive ranges. -using PreprocessorSkippedRangeMapping = llvm::DenseMap; - -/// The datastructure that holds the mapping between the active memory buffers -/// and the individual skip mappings. -using ExcludedPreprocessorDirectiveSkipMapping = - llvm::DenseMap; - -} // end namespace clang - -#endif // LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -10,8 +10,9 @@ #define LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_ #include "clang/Basic/BitmaskEnum.h" +#include "clang/Basic/FileEntry.h" #include "clang/Basic/LLVM.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include @@ -200,13 +201,18 @@ /// build it again. std::shared_ptr FailedModules; - /// Contains the currently active skipped range mappings for skipping excluded - /// conditional directives. + /// Function for getting the dependency preprocessor directives of a file. /// - /// The pointer is passed to the Preprocessor when it's constructed. The - /// pointer is unowned, the client is responsible for its lifetime. - ExcludedPreprocessorDirectiveSkipMapping - *ExcludedConditionalDirectiveSkipMappings = nullptr; + /// These are directives derived from a special form of lexing where the + /// source input is scanned for the preprocessor directives that might have an + /// effect on the dependencies for a compilation unit. + /// + /// Enables a client to cache the directives for a file and provide them + /// across multiple compiler invocations. + /// FIXME: Allow returning an error. + std::function>( + FileEntryRef)> + DependencyDirectivesForFile; /// Set up preprocessor for RunAnalysis action. bool SetUpStaticAnalyzer = false; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -10,7 +10,7 @@ #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H #include "clang/Basic/LLVM.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" @@ -22,24 +22,26 @@ namespace tooling { namespace dependencies { -/// Original and minimized contents of a cached file entry. Single instance can +using DependencyDirectivesTy = + SmallVector; + +/// Contents and directive tokens of a cached file entry. Single instance can /// be shared between multiple entries. struct CachedFileContents { - CachedFileContents(std::unique_ptr Original) - : Original(std::move(Original)), MinimizedAccess(nullptr) {} + CachedFileContents(std::unique_ptr Contents) + : Original(std::move(Contents)), DepDirectives(nullptr) {} /// Owning storage for the original contents. std::unique_ptr Original; /// The mutex that must be locked before mutating directive tokens. std::mutex ValueLock; - /// Owning storage for the minimized contents. - std::unique_ptr MinimizedStorage; + SmallVector DepDirectiveTokens; /// Accessor to the directive tokens that's atomic to avoid data races. - std::atomic MinimizedAccess; - /// Skipped range mapping of the minimized contents. - /// This is initialized iff `MinimizedAccess != nullptr`. - PreprocessorSkippedRangeMapping PPSkippedRangeMapping; + /// \p CachedFileContents has ownership of the pointer. + std::atomic *> DepDirectives; + + ~CachedFileContents() { delete DepDirectives.load(); } }; /// An in-memory representation of a file system entity that is of interest to @@ -86,13 +88,17 @@ /// \returns The scanned preprocessor directive tokens of the file that are /// used to speed up preprocessing, if available. - StringRef getDirectiveTokens() const { + Optional> + getDirectiveTokens() const { assert(!isError() && "error"); - assert(!MaybeStat->isDirectory() && "not a file"); + assert(!isDirectory() && "not a file"); assert(Contents && "contents not initialized"); - llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load(); - assert(Buffer && "not minimized"); - return Buffer->getBuffer(); + if (auto *Directives = Contents->DepDirectives.load()) { + if (Directives->hasValue()) + return ArrayRef( + Directives->getValue()); + } + return None; } /// \returns The error. @@ -111,15 +117,6 @@ return MaybeStat->getUniqueID(); } - /// \returns The mapping between location -> distance that is used to speed up - /// the block skipping in the preprocessor. - const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const { - assert(!isError() && "error"); - assert(!isDirectory() && "not a file"); - assert(Contents && "contents not initialized"); - return Contents->PPSkippedRangeMapping; - } - /// \returns The data structure holding both contents and directive tokens. CachedFileContents *getCachedContents() const { assert(!isError() && "error"); @@ -237,10 +234,6 @@ /// If the underlying entry is an opened file, this wrapper returns the file /// contents and the scanned preprocessor directives. class EntryRef { - /// For entry that is an opened file, this bit signifies whether its contents - /// are minimized. - bool Minimized; - /// The filename used to access this entry. std::string Filename; @@ -248,8 +241,8 @@ const CachedFileSystemEntry &Entry; public: - EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry) - : Minimized(Minimized), Filename(Name), Entry(Entry) {} + EntryRef(StringRef Name, const CachedFileSystemEntry &Entry) + : Filename(Name), Entry(Entry) {} llvm::vfs::Status getStatus() const { llvm::vfs::Status Stat = Entry.getStatus(); @@ -268,12 +261,11 @@ return *this; } - StringRef getContents() const { - return Minimized ? Entry.getDirectiveTokens() : Entry.getOriginalContents(); - } + StringRef getContents() const { return Entry.getOriginalContents(); } - const PreprocessorSkippedRangeMapping *getPPSkippedRangeMapping() const { - return Minimized ? &Entry.getPPSkippedRangeMapping() : nullptr; + Optional> + getDirectiveTokens() const { + return Entry.getDirectiveTokens(); } }; @@ -290,24 +282,13 @@ public: DependencyScanningWorkerFilesystem( DependencyScanningFilesystemSharedCache &SharedCache, - IntrusiveRefCntPtr FS, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) - : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache), - PPSkipMappings(PPSkipMappings) {} + IntrusiveRefCntPtr FS) + : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {} llvm::ErrorOr status(const Twine &Path) override; llvm::ErrorOr> openFileForRead(const Twine &Path) override; - /// Disable directives scanning of the given file. - void disableDirectivesScanning(StringRef Filename); - /// Enable directives scanning of all files. - void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); } - -private: - /// Check whether the file should be scanned for preprocessor directives. - bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID); - /// Returns entry for the given filename. /// /// Attempts to use the local and shared caches first, then falls back to @@ -316,6 +297,10 @@ getOrCreateFileSystemEntry(StringRef Filename, bool DisableDirectivesScanning = false); +private: + /// Check whether the file should be scanned for preprocessor directives. + bool shouldScanForDirectives(StringRef Filename); + /// For a filename that's not yet associated with any entry in the caches, /// uses the underlying filesystem to either look up the entry based in the /// shared cache indexed by unique ID, or creates new entry from scratch. @@ -396,12 +381,6 @@ /// The local cache is used by the worker thread to cache file system queries /// locally instead of querying the global cache every time. DependencyScanningFilesystemLocalCache LocalCache; - /// The mapping structure which records information about the - /// excluded conditional directive skip mappings that are used by the - /// currently active preprocessor. - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings; - /// The set of files that should not be scanned for PP directives. - llvm::DenseSet NotToBeScanned; }; } // end namespace dependencies diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -13,7 +13,6 @@ #include "clang/Basic/FileManager.h" #include "clang/Basic/LLVM.h" #include "clang/Frontend/PCHContainerOperations.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" #include "llvm/Support/Error.h" @@ -69,7 +68,6 @@ private: std::shared_ptr PCHContainerOps; - ExcludedPreprocessorDirectiveSkipMapping PPSkipMappings; /// The physical filesystem overlaid by `InMemoryFS`. llvm::IntrusiveRefCntPtr RealFS; diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -1157,10 +1157,10 @@ SourceManager &SM = CI.getPreprocessor().getSourceManager(); llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID()); - llvm::SmallString<1024> Output; + llvm::SmallVector Tokens; llvm::SmallVector Directives; if (scanSourceForDependencyDirectives( - FromFile.getBuffer(), Output, Directives, &CI.getDiagnostics(), + FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(), SM.getLocForStartOfFile(SM.getMainFileID()))) { assert(CI.getDiagnostics().hasErrorOccurred() && "no errors reported for failure"); @@ -1179,7 +1179,8 @@ } return; } - llvm::outs() << Output; + printDependencyDirectivesAsSource(FromFile.getBuffer(), Directives, + llvm::outs()); } void GetDependenciesByModuleNameAction::ExecuteAction() { diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -18,92 +18,148 @@ #include "clang/Basic/CharInfo.h" #include "clang/Basic/Diagnostic.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/MemoryBuffer.h" -using namespace llvm; using namespace clang; using namespace clang::dependency_directives_scan; +using namespace llvm; namespace { -struct Scanner { - /// Minimized output. - SmallVectorImpl &Out; - /// The known tokens encountered during the minimization. - SmallVectorImpl &Directives; +struct DirectiveWithTokens { + DirectiveKind Kind; + unsigned NumTokens; - Scanner(SmallVectorImpl &Out, SmallVectorImpl &Directives, - StringRef Input, DiagnosticsEngine *Diags, - SourceLocation InputSourceLoc) - : Out(Out), Directives(Directives), Input(Input), Diags(Diags), - InputSourceLoc(InputSourceLoc) {} + DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) + : Kind(Kind), NumTokens(NumTokens) {} +}; + +/// Does an efficient "scan" of the sources to detect the presence of +/// preprocessor (or module import) directives and collects the raw lexed tokens +/// for those directives so that the \p Lexer can "replay" them when the file is +/// included. +/// +/// Note that the behavior of the raw lexer is affected by the language mode, +/// while at this point we want to do a scan and collect tokens once, +/// irrespective of the language mode that the file will get included in. To +/// compensate for that the \p Lexer, while "replaying", will adjust a token +/// where appropriate, when it could affect the preprocessor's state. +/// For example in a directive like +/// +/// \code +/// #if __has_cpp_attribute(clang::fallthrough) +/// \endcode +/// +/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 +/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' +/// while in C++ mode. +struct Scanner { + Scanner(StringRef Input, + SmallVectorImpl &Tokens, + DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) + : Input(Input), Tokens(Tokens), Diags(Diags), + InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()), + TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), + Input.end()) {} + + static LangOptions getLangOptsForDepScanning() { + LangOptions LangOpts; + // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. + LangOpts.ObjC = true; + LangOpts.LineComment = true; + return LangOpts; + } /// Lex the provided source and emit the directive tokens. /// /// \returns True on error. - bool scan(); + bool scan(SmallVectorImpl &Directives); private: - struct IdInfo { - const char *Last; - StringRef Name; - }; + /// Lexes next token and advances \p First and the \p Lexer. + LLVM_NODISCARD dependency_directives_scan::Token & + lexToken(const char *&First, const char *const End); + + dependency_directives_scan::Token &lexIncludeFilename(const char *&First, + const char *const End); - /// Lex an identifier. + /// Lexes next token and if it is identifier returns its string, otherwise + /// it skips the current line and returns \p None. /// - /// \pre First points at a valid identifier head. - LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); - LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, - const char *const End); + /// In any case (whatever the token kind) \p First and the \p Lexer will + /// advance beyond the token. + LLVM_NODISCARD Optional + tryLexIdentifierOrSkipLine(const char *&First, const char *const End); + + /// Used when it is certain that next token is an identifier. + LLVM_NODISCARD StringRef lexIdentifier(const char *&First, + const char *const End); + + /// Lexes next token and returns true iff it is an identifier that matches \p + /// Id, otherwise it skips the current line and returns false. + /// + /// In any case (whatever the token kind) \p First and the \p Lexer will + /// advance beyond the token. + LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id, + const char *&First, + const char *const End); + LLVM_NODISCARD bool scanImpl(const char *First, const char *const End); LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First, + const char *const End); LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, StringRef Directive, - const char *&First, const char *const End); - Directive &pushDirective(DirectiveKind K) { - Directives.emplace_back(K, Out.size()); - return Directives.back(); + LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First, + const char *const End); + LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind, + const char *&First, + const char *const End); + void lexPPDirectiveBody(const char *&First, const char *const End); + + DirectiveWithTokens &pushDirective(DirectiveKind Kind) { + Tokens.append(CurDirToks); + DirsWithToks.emplace_back(Kind, CurDirToks.size()); + CurDirToks.clear(); + return DirsWithToks.back(); } void popDirective() { - Out.resize(Directives.back().Offset); - Directives.pop_back(); + Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); } DirectiveKind topDirective() const { - return Directives.empty() ? pp_none : Directives.back().Kind; + return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; } - Scanner &put(char Byte) { - Out.push_back(Byte); - return *this; - } - Scanner &append(StringRef S) { return append(S.begin(), S.end()); } - Scanner &append(const char *First, const char *Last) { - Out.append(First, Last); - return *this; + unsigned getOffsetAt(const char *CurPtr) const { + return CurPtr - Input.data(); } - void printToNewline(const char *&First, const char *const End); - void printAdjacentModuleNameParts(const char *&First, const char *const End); - LLVM_NODISCARD bool printAtImportBody(const char *&First, - const char *const End); - void printDirectiveBody(const char *&First, const char *const End); - void printAdjacentMacroArgs(const char *&First, const char *const End); - LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); - /// Reports a diagnostic if the diagnostic engine is provided. Always returns /// true at the end. bool reportError(const char *CurPtr, unsigned Err); StringMap SplitIds; StringRef Input; + SmallVectorImpl &Tokens; DiagnosticsEngine *Diags; SourceLocation InputSourceLoc; + + /// Keeps track of the tokens for the currently lexed directive. Once a + /// directive is fully lexed and "committed" then the tokens get appended to + /// \p Tokens and \p CurDirToks is cleared for the next directive. + SmallVector CurDirToks; + /// The directives that were lexed along with the number of tokens that each + /// directive contains. The tokens of all the directives are kept in \p Tokens + /// vector, in the same order as the directives order in \p DirsWithToks. + SmallVector DirsWithToks; + LangOptions LangOpts; + Lexer TheLexer; }; } // end anonymous namespace @@ -112,7 +168,7 @@ if (!Diags) return true; assert(CurPtr >= Input.data() && "invalid buffer ptr"); - Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); + Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); return true; } @@ -265,30 +321,6 @@ } } -static const char *findLastNonSpace(const char *First, const char *Last) { - assert(First <= Last); - while (First != Last && isHorizontalWhitespace(Last[-1])) - --Last; - return Last; -} - -static const char *findLastNonSpaceNonBackslash(const char *First, - const char *Last) { - assert(First <= Last); - while (First != Last && - (isHorizontalWhitespace(Last[-1]) || Last[-1] == '\\')) - --Last; - return Last; -} - -static const char *findFirstTrailingSpace(const char *First, const char *Last) { - const char *LastNonSpace = findLastNonSpace(First, Last); - if (Last == LastNonSpace) - return Last; - assert(isHorizontalWhitespace(LastNonSpace[0])); - return LastNonSpace + 1; -} - static void skipLineComment(const char *&First, const char *const End) { assert(First[0] == '/' && First[1] == '/'); First += 2; @@ -396,67 +428,6 @@ skipLine(First, End); } -void Scanner::printToNewline(const char *&First, const char *const End) { - while (First != End && !isVerticalWhitespace(*First)) { - const char *Last = First; - do { - // Iterate over strings correctly to avoid comments and newlines. - if (*Last == '"' || *Last == '\'' || - (*Last == '<' && - (topDirective() == pp_include || topDirective() == pp_import))) { - if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) - skipRawString(Last, End); - else - skipString(Last, End); - continue; - } - if (*Last != '/' || End - Last < 2) { - ++Last; - continue; // Gather the rest up to print verbatim. - } - - if (Last[1] != '/' && Last[1] != '*') { - ++Last; - continue; - } - - // Deal with "//..." and "/*...*/". - append(First, findFirstTrailingSpace(First, Last)); - First = Last; - - if (Last[1] == '/') { - skipLineComment(First, End); - return; - } - - put(' '); - skipBlockComment(First, End); - skipOverSpaces(First, End); - Last = First; - } while (Last != End && !isVerticalWhitespace(*Last)); - - // Print out the string. - const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last); - if (Last == End || LastBeforeTrailingSpace == First || - LastBeforeTrailingSpace[-1] != '\\') { - append(First, LastBeforeTrailingSpace); - First = Last; - skipNewline(First, End); - return; - } - - // Print up to the last character that's not a whitespace or backslash. - // Then print exactly one space, which matters when tokens are separated by - // a line continuation. - append(First, findLastNonSpaceNonBackslash(First, Last)); - put(' '); - - First = Last; - skipNewline(First, End); - skipOverSpaces(First, End); - } -} - static void skipWhitespace(const char *&First, const char *const End) { for (;;) { assert(First <= End); @@ -489,176 +460,134 @@ } } -void Scanner::printAdjacentModuleNameParts(const char *&First, - const char *const End) { - // Skip over parts of the body. - const char *Last = First; - do - ++Last; - while (Last != End && (isAsciiIdentifierContinue(*Last) || *Last == '.')); - append(First, Last); - First = Last; -} - -bool Scanner::printAtImportBody(const char *&First, const char *const End) { +bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, + const char *const End) { + const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; for (;;) { - skipWhitespace(First, End); - if (First == End) - return true; - - if (isVerticalWhitespace(*First)) { - skipNewline(First, End); - continue; - } - - // Found a semicolon. - if (*First == ';') { - put(*First++).put('\n'); - return false; - } - - // Don't handle macro expansions inside @import for now. - if (!isAsciiIdentifierContinue(*First) && *First != '.') - return true; - - printAdjacentModuleNameParts(First, End); + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.is(tok::eof)) + return reportError( + DirectiveLoc, + diag::err_dep_source_scanner_missing_semi_after_at_import); + if (Tok.is(tok::semi)) + break; } + pushDirective(Kind); + skipWhitespace(First, End); + if (First == End) + return false; + if (!isVerticalWhitespace(*First)) + return reportError( + DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); + skipNewline(First, End); + return false; } -void Scanner::printDirectiveBody(const char *&First, const char *const End) { - skipWhitespace(First, End); // Skip initial whitespace. - printToNewline(First, End); - while (Out.back() == ' ') - Out.pop_back(); - put('\n'); -} +dependency_directives_scan::Token &Scanner::lexToken(const char *&First, + const char *const End) { + clang::Token Tok; + TheLexer.LexFromRawLexer(Tok); + First = Input.data() + TheLexer.getCurrentBufferOffset(); + assert(First <= End); -LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, - const char *const End) { - assert(isAsciiIdentifierContinue(*First) && "invalid identifer"); - const char *Last = First + 1; - while (Last != End && isAsciiIdentifierContinue(*Last)) - ++Last; - return Last; + unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); + CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), + Tok.getFlags()); + return CurDirToks.back(); } -LLVM_NODISCARD static const char * -getIdentifierContinuation(const char *First, const char *const End) { - if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) - return nullptr; +dependency_directives_scan::Token & +Scanner::lexIncludeFilename(const char *&First, const char *const End) { + clang::Token Tok; + TheLexer.LexIncludeFilename(Tok); + First = Input.data() + TheLexer.getCurrentBufferOffset(); + assert(First <= End); - ++First; - skipNewline(First, End); - if (First == End) - return nullptr; - return isAsciiIdentifierContinue(First[0]) ? First : nullptr; -} - -Scanner::IdInfo Scanner::lexIdentifier(const char *First, - const char *const End) { - const char *Last = lexRawIdentifier(First, End); - const char *Next = getIdentifierContinuation(Last, End); - if (LLVM_LIKELY(!Next)) - return IdInfo{Last, StringRef(First, Last - First)}; - - // Slow path, where identifiers are split over lines. - SmallVector Id(First, Last); - while (Next) { - Last = lexRawIdentifier(Next, End); - Id.append(Next, Last); - Next = getIdentifierContinuation(Last, End); - } - return IdInfo{ - Last, - SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; + unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); + CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), + Tok.getFlags()); + return CurDirToks.back(); } -void Scanner::printAdjacentMacroArgs(const char *&First, - const char *const End) { - // Skip over parts of the body. - const char *Last = First; - do - ++Last; - while (Last != End && - (isAsciiIdentifierContinue(*Last) || *Last == '.' || *Last == ',')); - append(First, Last); - First = Last; +void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { + while (true) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.is(tok::eod)) + break; + } } -bool Scanner::printMacroArgs(const char *&First, const char *const End) { - assert(*First == '('); - put(*First++); - for (;;) { - skipWhitespace(First, End); - if (First == End) - return true; +LLVM_NODISCARD Optional +Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.isNot(tok::raw_identifier)) { + if (!Tok.is(tok::eod)) + skipLine(First, End); + return None; + } - if (*First == ')') { - put(*First++); - return false; - } + bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; + if (LLVM_LIKELY(!NeedsCleaning)) + return Input.slice(Tok.Offset, Tok.getEnd()); - // This is intentionally fairly liberal. - if (!(isAsciiIdentifierContinue(*First) || *First == '.' || *First == ',')) - return true; + SmallString<64> Spelling; + Spelling.resize(Tok.Length); - printAdjacentMacroArgs(First, End); + unsigned SpellingLength = 0; + const char *BufPtr = Input.begin() + Tok.Offset; + const char *AfterIdent = Input.begin() + Tok.getEnd(); + while (BufPtr < AfterIdent) { + unsigned Size; + Spelling[SpellingLength++] = + Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); + BufPtr += Size; } + + return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) + .first->first(); } -/// Looks for an identifier starting from Last. -/// -/// Updates "First" to just past the next identifier, if any. Returns true iff -/// the identifier matches "Id". -bool Scanner::isNextIdentifier(StringRef Id, const char *&First, - const char *const End) { - skipWhitespace(First, End); - if (First == End || !isAsciiIdentifierStart(*First)) - return false; +StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { + Optional Id = tryLexIdentifierOrSkipLine(First, End); + assert(Id.hasValue() && "expected identifier token"); + return Id.getValue(); +} - IdInfo FoundId = lexIdentifier(First, End); - First = FoundId.Last; - return FoundId.Name == Id; +bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, + const char *const End) { + if (Optional FoundId = tryLexIdentifierOrSkipLine(First, End)) { + if (*FoundId == Id) + return true; + skipLine(First, End); + } + return false; } bool Scanner::lexAt(const char *&First, const char *const End) { // Handle "@import". - const char *ImportLoc = First++; - if (!isNextIdentifier("import", First, End)) { - skipLine(First, End); - return false; - } - pushDirective(decl_at_import); - append("@import "); - if (printAtImportBody(First, End)) - return reportError( - ImportLoc, diag::err_dep_source_scanner_missing_semi_after_at_import); - skipWhitespace(First, End); - if (First == End) + + // Lex '@'. + const dependency_directives_scan::Token &AtTok = lexToken(First, End); + assert(AtTok.is(tok::at)); + (void)AtTok; + + if (!isNextIdentifierOrSkipLine("import", First, End)) return false; - if (!isVerticalWhitespace(*First)) - return reportError( - ImportLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); - skipNewline(First, End); - return false; + return lexModuleDirectiveBody(decl_at_import, First, End); } bool Scanner::lexModule(const char *&First, const char *const End) { - IdInfo Id = lexIdentifier(First, End); - First = Id.Last; + StringRef Id = lexIdentifier(First, End); bool Export = false; - if (Id.Name == "export") { + if (Id == "export") { Export = true; - skipWhitespace(First, End); - if (!isAsciiIdentifierContinue(*First)) { - skipLine(First, End); + Optional NextId = tryLexIdentifierOrSkipLine(First, End); + if (!NextId) return false; - } - Id = lexIdentifier(First, End); - First = Id.Last; + Id = *NextId; } - if (Id.Name != "module" && Id.Name != "import") { + if (Id != "module" && Id != "import") { skipLine(First, End); return false; } @@ -680,114 +609,51 @@ } } - if (Export) { - pushDirective(cxx_export_decl); - append("export "); - } + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); - if (Id.Name == "module") - pushDirective(cxx_module_decl); + DirectiveKind Kind; + if (Id == "module") + Kind = Export ? cxx_export_module_decl : cxx_module_decl; else - pushDirective(cxx_import_decl); - append(Id.Name); - append(" "); - printToNewline(First, End); - append("\n"); - return false; -} - -bool Scanner::lexDefine(const char *&First, const char *const End) { - pushDirective(pp_define); - append("#define "); - skipWhitespace(First, End); + Kind = Export ? cxx_export_import_decl : cxx_import_decl; - if (!isAsciiIdentifierStart(*First)) - return reportError(First, diag::err_pp_macro_not_identifier); - - IdInfo Id = lexIdentifier(First, End); - const char *Last = Id.Last; - append(Id.Name); - if (Last == End) - return false; - if (*Last == '(') { - size_t Size = Out.size(); - if (printMacroArgs(Last, End)) { - // Be robust to bad macro arguments, since they can show up in disabled - // code. - Out.resize(Size); - append("(/* invalid */\n"); - skipLine(Last, End); - return false; - } - } - skipWhitespace(Last, End); - if (Last == End) - return false; - if (!isVerticalWhitespace(*Last)) - put(' '); - printDirectiveBody(Last, End); - First = Last; - return false; + return lexModuleDirectiveBody(Kind, First, End); } bool Scanner::lexPragma(const char *&First, const char *const End) { - // #pragma. - skipWhitespace(First, End); - if (First == End || !isAsciiIdentifierStart(*First)) + Optional FoundId = tryLexIdentifierOrSkipLine(First, End); + if (!FoundId) return false; - IdInfo FoundId = lexIdentifier(First, End); - First = FoundId.Last; - if (FoundId.Name == "once") { - // #pragma once - skipLine(First, End); - pushDirective(pp_pragma_once); - append("#pragma once\n"); - return false; - } - if (FoundId.Name == "push_macro") { - // #pragma push_macro - pushDirective(pp_pragma_push_macro); - append("#pragma push_macro"); - printDirectiveBody(First, End); - return false; - } - if (FoundId.Name == "pop_macro") { - // #pragma pop_macro - pushDirective(pp_pragma_pop_macro); - append("#pragma pop_macro"); - printDirectiveBody(First, End); - return false; - } - if (FoundId.Name == "include_alias") { - // #pragma include_alias - pushDirective(pp_pragma_include_alias); - append("#pragma include_alias"); - printDirectiveBody(First, End); + StringRef Id = FoundId.getValue(); + auto Kind = llvm::StringSwitch(Id) + .Case("once", pp_pragma_once) + .Case("push_macro", pp_pragma_push_macro) + .Case("pop_macro", pp_pragma_pop_macro) + .Case("include_alias", pp_pragma_include_alias) + .Default(pp_none); + if (Kind != pp_none) { + lexPPDirectiveBody(First, End); + pushDirective(Kind); return false; } - if (FoundId.Name != "clang") { + if (Id != "clang") { skipLine(First, End); return false; } // #pragma clang. - if (!isNextIdentifier("module", First, End)) { - skipLine(First, End); + if (!isNextIdentifierOrSkipLine("module", First, End)) return false; - } // #pragma clang module. - if (!isNextIdentifier("import", First, End)) { - skipLine(First, End); + if (!isNextIdentifierOrSkipLine("import", First, End)) return false; - } // #pragma clang module import. + lexPPDirectiveBody(First, End); pushDirective(pp_pragma_import); - append("#pragma clang module import "); - printDirectiveBody(First, End); return false; } @@ -808,14 +674,13 @@ return false; } - return lexDefault(pp_endif, "endif", First, End); + return lexDefault(pp_endif, First, End); } -bool Scanner::lexDefault(DirectiveKind Kind, StringRef Directive, - const char *&First, const char *const End) { +bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, + const char *const End) { + lexPPDirectiveBody(First, End); pushDirective(Kind); - put('#').append(Directive).put(' '); - printDirectiveBody(First, End); return false; } @@ -845,6 +710,14 @@ return false; } + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); + + auto ScEx1 = make_scope_exit([&]() { + /// Clear Scanner's CurDirToks before returning, in case we didn't push a + /// new directive. + CurDirToks.clear(); + }); + // Handle "@import". if (*First == '@') return lexAt(First, End); @@ -853,25 +726,26 @@ return lexModule(First, End); // Handle preprocessing directives. - ++First; // Skip over '#'. - skipWhitespace(First, End); - if (First == End) - return reportError(First, diag::err_pp_expected_eol); + TheLexer.setParsingPreprocessorDirective(true); + auto ScEx2 = make_scope_exit( + [&]() { TheLexer.setParsingPreprocessorDirective(false); }); - if (!isAsciiIdentifierStart(*First)) { - skipLine(First, End); + // Lex '#'. + const dependency_directives_scan::Token &HashTok = lexToken(First, End); + assert(HashTok.is(tok::hash)); + (void)HashTok; + + Optional FoundId = tryLexIdentifierOrSkipLine(First, End); + if (!FoundId) return false; - } - // Figure out the token. - IdInfo Id = lexIdentifier(First, End); - First = Id.Last; + StringRef Id = FoundId.getValue(); - if (Id.Name == "pragma") + if (Id == "pragma") return lexPragma(First, End); - auto Kind = llvm::StringSwitch(Id.Name) + auto Kind = llvm::StringSwitch(Id) .Case("include", pp_include) .Case("__include_macros", pp___include_macros) .Case("define", pp_define) @@ -888,18 +762,26 @@ .Case("endif", pp_endif) .Default(pp_none); if (Kind == pp_none) { - skipDirective(Id.Name, First, End); + skipDirective(Id, First, End); return false; } if (Kind == pp_endif) return lexEndif(First, End); - if (Kind == pp_define) - return lexDefine(First, End); + switch (Kind) { + case pp_include: + case pp___include_macros: + case pp_include_next: + case pp_import: + lexIncludeFilename(First, End); + break; + default: + break; + } // Everything else. - return lexDefault(Kind, Id.Name, First, End); + return lexDefault(Kind, First, End); } static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { @@ -916,78 +798,65 @@ return false; } -bool Scanner::scan() { +bool Scanner::scan(SmallVectorImpl &Directives) { bool Error = scanImpl(Input.begin(), Input.end()); if (!Error) { - // Add a trailing newline and an EOF on success. - if (!Out.empty() && Out.back() != '\n') - Out.push_back('\n'); + // Add an EOF on success. pushDirective(pp_eof); } - // Null-terminate the output. This way the memory buffer that's passed to - // Clang will not have to worry about the terminating '\0'. - Out.push_back(0); - Out.pop_back(); + ArrayRef RemainingTokens = Tokens; + for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { + assert(RemainingTokens.size() >= DirWithToks.NumTokens); + Directives.emplace_back(DirWithToks.Kind, + RemainingTokens.take_front(DirWithToks.NumTokens)); + RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); + } + assert(RemainingTokens.empty()); + return Error; } -bool clang::dependency_directives_scan::computeSkippedRanges( - ArrayRef Input, llvm::SmallVectorImpl &Range) { - struct IfElseDirective { - enum DirectiveKind { - If, // if/ifdef/ifndef - Else // elif/elifdef/elifndef, else - }; - int Offset; - DirectiveKind Kind; +bool clang::scanSourceForDependencyDirectives( + StringRef Input, SmallVectorImpl &Tokens, + SmallVectorImpl &Directives, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) { + return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); +} + +void clang::printDependencyDirectivesAsSource( + StringRef Source, + ArrayRef Directives, + llvm::raw_ostream &OS) { + // Add a space separator where it is convenient for testing purposes. + auto needsSpaceSeparator = + [](tok::TokenKind Prev, + const dependency_directives_scan::Token &Tok) -> bool { + if (Prev == Tok.Kind) + return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, + tok::r_square); + if (Prev == tok::raw_identifier && + Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, + tok::char_constant, tok::header_name)) + return true; + if (Prev == tok::r_paren && + Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal, + tok::char_constant, tok::unknown)) + return true; + if (Prev == tok::comma && + Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less)) + return true; + return false; }; - llvm::SmallVector Offsets; - for (const Directive &T : Input) { - switch (T.Kind) { - case pp_if: - case pp_ifdef: - case pp_ifndef: - Offsets.push_back({T.Offset, IfElseDirective::If}); - break; - - case pp_elif: - case pp_elifdef: - case pp_elifndef: - case pp_else: { - if (Offsets.empty()) - return true; - int PreviousOffset = Offsets.back().Offset; - Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); - Offsets.push_back({T.Offset, IfElseDirective::Else}); - break; - } - case pp_endif: { - if (Offsets.empty()) - return true; - int PreviousOffset = Offsets.back().Offset; - Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); - do { - IfElseDirective::DirectiveKind Kind = Offsets.pop_back_val().Kind; - if (Kind == IfElseDirective::If) - break; - } while (!Offsets.empty()); - break; - } - default: - break; + for (const dependency_directives_scan::Directive &Directive : Directives) { + Optional PrevTokenKind; + for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { + if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) + OS << ' '; + PrevTokenKind = Tok.Kind; + OS << Source.slice(Tok.Offset, Tok.getEnd()); } } - return false; -} - -bool clang::scanSourceForDependencyDirectives( - StringRef Input, SmallVectorImpl &Output, - SmallVectorImpl &Directives, DiagnosticsEngine *Diags, - SourceLocation InputSourceLoc) { - Output.clear(); - Directives.clear(); - return Scanner(Output, Directives, Input, Diags, InputSourceLoc).scan(); } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -226,13 +226,11 @@ return L; } -bool Lexer::skipOver(unsigned NumBytes) { - IsAtPhysicalStartOfLine = true; - IsAtStartOfLine = true; - if ((BufferPtr + NumBytes) > BufferEnd) - return true; - BufferPtr += NumBytes; - return false; +void Lexer::seek(unsigned Offset, bool IsAtStartOfLine) { + this->IsAtPhysicalStartOfLine = IsAtStartOfLine; + this->IsAtStartOfLine = IsAtStartOfLine; + assert((BufferStart + Offset) <= BufferEnd); + BufferPtr = BufferStart + Offset; } template static void StringifyImpl(T &Str, char Quote) { @@ -2939,6 +2937,13 @@ unsigned Lexer::isNextPPTokenLParen() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); + if (isDependencyDirectivesLexer()) { + if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) + return 2; + return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( + tok::l_paren); + } + // Switch to 'skipping' mode. This will ensure that we can lex a token // without emitting diagnostics, disables macro expansion, and will cause EOF // to return an EOF token instead of popping the include stack. @@ -3281,6 +3286,8 @@ } bool Lexer::Lex(Token &Result) { + assert(!isDependencyDirectivesLexer()); + // Start a new token. Result.startToken(); @@ -4102,3 +4109,129 @@ // We parsed the directive; lex a token with the new state. return false; } + +const char *Lexer::convertDependencyDirectiveToken( + const dependency_directives_scan::Token &DDTok, Token &Result) { + const char *TokPtr = BufferStart + DDTok.Offset; + Result.startToken(); + Result.setLocation(getSourceLocation(TokPtr)); + Result.setKind(DDTok.Kind); + Result.setFlag((Token::TokenFlags)DDTok.Flags); + Result.setLength(DDTok.Length); + BufferPtr = TokPtr + DDTok.Length; + return TokPtr; +} + +bool Lexer::LexDependencyDirectiveToken(Token &Result) { + assert(isDependencyDirectivesLexer()); + + using namespace dependency_directives_scan; + + while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) { + if (DepDirectives.front().Kind == pp_eof) + return LexEndOfFile(Result, BufferEnd); + NextDepDirectiveTokenIndex = 0; + DepDirectives = DepDirectives.drop_front(); + } + + const dependency_directives_scan::Token &DDTok = + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++]; + + const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result); + + if (Result.is(tok::hash) && Result.isAtStartOfLine()) { + PP->HandleDirective(Result); + return false; + } + if (Result.is(tok::raw_identifier)) { + Result.setRawIdentifierData(TokPtr); + if (!isLexingRawMode()) { + IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (II->isHandleIdentifierCase()) + return PP->HandleIdentifier(Result); + } + return true; + } + if (Result.isLiteral()) { + Result.setLiteralData(TokPtr); + return true; + } + if (Result.is(tok::colon) && + (LangOpts.CPlusPlus || LangOpts.DoubleSquareBracketAttributes)) { + // Convert consecutive colons to 'tok::coloncolon'. + if (*BufferPtr == ':') { + assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( + tok::colon)); + ++NextDepDirectiveTokenIndex; + Result.setKind(tok::coloncolon); + } + return true; + } + if (Result.is(tok::eod)) + ParsingPreprocessorDirective = false; + + return true; +} + +bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) { + assert(isDependencyDirectivesLexer()); + + using namespace dependency_directives_scan; + + bool Stop = false; + unsigned NestedIfs = 0; + do { + DepDirectives = DepDirectives.drop_front(); + switch (DepDirectives.front().Kind) { + case pp_none: + llvm_unreachable("unexpected 'pp_none'"); + case pp_include: + case pp___include_macros: + case pp_define: + case pp_undef: + case pp_import: + case pp_pragma_import: + case pp_pragma_once: + case pp_pragma_push_macro: + case pp_pragma_pop_macro: + case pp_pragma_include_alias: + case pp_include_next: + case decl_at_import: + case cxx_module_decl: + case cxx_import_decl: + case cxx_export_module_decl: + case cxx_export_import_decl: + break; + case pp_if: + case pp_ifdef: + case pp_ifndef: + ++NestedIfs; + break; + case pp_elif: + case pp_elifdef: + case pp_elifndef: + case pp_else: + if (!NestedIfs) { + Stop = true; + } + break; + case pp_endif: + if (!NestedIfs) { + Stop = true; + } else { + --NestedIfs; + } + break; + case pp_eof: + return LexEndOfFile(Result, BufferEnd); + } + } while (!Stop); + + const dependency_directives_scan::Token &DDTok = + DepDirectives.front().Tokens.front(); + assert(DDTok.is(tok::hash)); + NextDepDirectiveTokenIndex = 1; + + convertDependencyDirectiveToken(DDTok, Result); + return false; +} diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -443,41 +443,6 @@ return DiscardUntilEndOfDirective().getEnd(); } -Optional Preprocessor::getSkippedRangeForExcludedConditionalBlock( - SourceLocation HashLoc) { - if (!ExcludedConditionalDirectiveSkipMappings) - return None; - if (!HashLoc.isFileID()) - return None; - - std::pair HashFileOffset = - SourceMgr.getDecomposedLoc(HashLoc); - Optional Buf = - SourceMgr.getBufferOrNone(HashFileOffset.first); - if (!Buf) - return None; - auto It = - ExcludedConditionalDirectiveSkipMappings->find(Buf->getBufferStart()); - if (It == ExcludedConditionalDirectiveSkipMappings->end()) - return None; - - const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond(); - // Check if the offset of '#' is mapped in the skipped ranges. - auto MappingIt = SkippedRanges.find(HashFileOffset.second); - if (MappingIt == SkippedRanges.end()) - return None; - - unsigned BytesToSkip = MappingIt->getSecond(); - unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset(); - assert(CurLexerBufferOffset >= HashFileOffset.second && - "lexer is before the hash?"); - // Take into account the fact that the lexer has already advanced, so the - // number of bytes to skip must be adjusted. - unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second; - assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?"); - return BytesToSkip - LengthDiff; -} - void Preprocessor::SuggestTypoedDirective(const Token &Tok, StringRef Directive, const SourceLocation &EndLoc) const { @@ -527,36 +492,42 @@ // disabling warnings, etc. CurPPLexer->LexingRawMode = true; Token Tok; - if (auto SkipLength = - getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) { - // Skip to the next '#endif' / '#else' / '#elif'. - CurLexer->skipOver(*SkipLength); - } SourceLocation endLoc; while (true) { - CurLexer->Lex(Tok); + if (CurLexer->isDependencyDirectivesLexer()) { + CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok); + } else { + while (true) { + CurLexer->Lex(Tok); - if (Tok.is(tok::code_completion)) { - setCodeCompletionReached(); - if (CodeComplete) - CodeComplete->CodeCompleteInConditionalExclusion(); - continue; - } + if (Tok.is(tok::code_completion)) { + setCodeCompletionReached(); + if (CodeComplete) + CodeComplete->CodeCompleteInConditionalExclusion(); + continue; + } - // If this is the end of the buffer, we have an error. - if (Tok.is(tok::eof)) { - // We don't emit errors for unterminated conditionals here, - // Lexer::LexEndOfFile can do that properly. - // Just return and let the caller lex after this #include. - if (PreambleConditionalStack.isRecording()) - PreambleConditionalStack.SkipInfo.emplace( - HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc); - break; - } + // If this is the end of the buffer, we have an error. + if (Tok.is(tok::eof)) { + // We don't emit errors for unterminated conditionals here, + // Lexer::LexEndOfFile can do that properly. + // Just return and let the caller lex after this #include. + if (PreambleConditionalStack.isRecording()) + PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc, + FoundNonSkipPortion, + FoundElse, ElseLoc); + break; + } - // If this token is not a preprocessor directive, just skip it. - if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) - continue; + // If this token is not a preprocessor directive, just skip it. + if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) + continue; + + break; + } + } + if (Tok.is(tok::eof)) + break; // We just parsed a # character at the start of a line, so we're in // directive mode. Tell the lexer this so any newlines we see will be diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -91,8 +91,19 @@ CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset); } - EnterSourceFileWithLexer( - new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile), CurDir); + Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile); + if (getPreprocessorOpts().DependencyDirectivesForFile && + FID != PredefinesFileID) { + if (Optional File = SourceMgr.getFileEntryRefForID(FID)) { + if (Optional> + DepDirectives = + getPreprocessorOpts().DependencyDirectivesForFile(*File)) { + TheLexer->DepDirectives = *DepDirectives; + } + } + } + + EnterSourceFileWithLexer(TheLexer, CurDir); return false; } @@ -110,7 +121,9 @@ CurDirLookup = CurDir; CurLexerSubmodule = nullptr; if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_Lexer; + CurLexerKind = TheLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -158,11 +158,6 @@ if (this->PPOpts->GeneratePreamble) PreambleConditionalStack.startRecording(); - ExcludedConditionalDirectiveSkipMappings = - this->PPOpts->ExcludedConditionalDirectiveSkipMappings; - if (ExcludedConditionalDirectiveSkipMappings) - ExcludedConditionalDirectiveSkipMappings->clear(); - MaxTokens = LangOpts.MaxTokens; } @@ -382,7 +377,9 @@ void Preprocessor::recomputeCurLexerKind() { if (CurLexer) - CurLexerKind = CLK_Lexer; + CurLexerKind = CurLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; else if (CurTokenLexer) CurLexerKind = CLK_TokenLexer; else @@ -645,6 +642,9 @@ case CLK_CachingLexer: CachingLex(Tok); break; + case CLK_DependencyDirectivesLexer: + CurLexer->LexDependencyDirectiveToken(Tok); + break; case CLK_LexAfterModuleImport: LexAfterModuleImport(Tok); break; @@ -906,6 +906,9 @@ CachingLex(Result); ReturnedToken = true; break; + case CLK_DependencyDirectivesLexer: + ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result); + break; case CLK_LexAfterModuleImport: ReturnedToken = LexAfterModuleImport(Result); break; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" -#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/Threading.h" @@ -44,64 +43,41 @@ EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary( const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { if (Entry.isError() || Entry.isDirectory() || Disable || - !shouldScanForDirectives(Filename, Entry.getUniqueID())) - return EntryRef(/*Minimized=*/false, Filename, Entry); + !shouldScanForDirectives(Filename)) + return EntryRef(Filename, Entry); CachedFileContents *Contents = Entry.getCachedContents(); assert(Contents && "contents not initialized"); // Double-checked locking. - if (Contents->MinimizedAccess.load()) - return EntryRef(/*Minimized=*/true, Filename, Entry); + if (Contents->DepDirectives.load()) + return EntryRef(Filename, Entry); std::lock_guard GuardLock(Contents->ValueLock); // Double-checked locking. - if (Contents->MinimizedAccess.load()) - return EntryRef(/*Minimized=*/true, Filename, Entry); + if (Contents->DepDirectives.load()) + return EntryRef(Filename, Entry); - llvm::SmallString<1024> MinimizedFileContents; - // Minimize the file down to directives that might affect the dependencies. - SmallVector Tokens; + SmallVector Directives; + // Scan the file for preprocessor directives that might affect the + // dependencies. if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), - MinimizedFileContents, Tokens)) { + Contents->DepDirectiveTokens, + Directives)) { + Contents->DepDirectiveTokens.clear(); // FIXME: Propagate the diagnostic if desired by the client. - // Use the original file if the minimization failed. - Contents->MinimizedStorage = - llvm::MemoryBuffer::getMemBuffer(*Contents->Original); - Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); - return EntryRef(/*Minimized=*/true, Filename, Entry); + Contents->DepDirectives.store(new Optional()); + return EntryRef(Filename, Entry); } - // The contents produced by the minimizer must be null terminated. - assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && - "not null terminated contents"); - - // Compute the skipped PP ranges that speedup skipping over inactive - // preprocessor blocks. - llvm::SmallVector SkippedRanges; - dependency_directives_scan::computeSkippedRanges(Tokens, SkippedRanges); - PreprocessorSkippedRangeMapping Mapping; - for (const auto &Range : SkippedRanges) { - if (Range.Length < 16) { - // Ignore small ranges as non-profitable. - // FIXME: This is a heuristic, its worth investigating the tradeoffs - // when it should be applied. - continue; - } - Mapping[Range.Offset] = Range.Length; - } - Contents->PPSkippedRangeMapping = std::move(Mapping); - - Contents->MinimizedStorage = std::make_unique( - std::move(MinimizedFileContents)); - // This function performed double-checked locking using `MinimizedAccess`. - // Assigning it must be the last thing this function does. If we were to - // assign it before `PPSkippedRangeMapping`, other threads may skip the - // critical section (`MinimizedAccess != nullptr`) and access the mappings - // that are about to be initialized, leading to a data race. - Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); - return EntryRef(/*Minimized=*/true, Filename, Entry); + // This function performed double-checked locking using `DepDirectives`. + // Assigning it must be the last thing this function does, otherwise other + // threads may skip the + // critical section (`DepDirectives != nullptr`), leading to a data race. + Contents->DepDirectives.store( + new Optional(std::move(Directives))); + return EntryRef(Filename, Entry); } DependencyScanningFilesystemSharedCache:: @@ -208,19 +184,9 @@ return shouldScanForDirectivesBasedOnExtension(Filename); } -void DependencyScanningWorkerFilesystem::disableDirectivesScanning( - StringRef Filename) { - // Since we're not done setting up `NotToBeScanned` yet, we need to disable - // directive scanning explicitly. - if (llvm::ErrorOr Result = getOrCreateFileSystemEntry( - Filename, /*DisableDirectivesScanning=*/true)) - NotToBeScanned.insert(Result->getStatus().getUniqueID()); -} - bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( - StringRef Filename, llvm::sys::fs::UniqueID UID) { - return shouldScanForDirectivesBasedOnExtension(Filename) && - !NotToBeScanned.contains(UID); + StringRef Filename) { + return shouldScanForDirectivesBasedOnExtension(Filename); } const CachedFileSystemEntry & @@ -307,9 +273,7 @@ llvm::vfs::Status Stat) : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} - static llvm::ErrorOr> - create(EntryRef Entry, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings); + static llvm::ErrorOr> create(EntryRef Entry); llvm::ErrorOr status() override { return Stat; } @@ -329,8 +293,7 @@ } // end anonymous namespace llvm::ErrorOr> -DepScanFile::create(EntryRef Entry, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) { +DepScanFile::create(EntryRef Entry) { assert(!Entry.isError() && "error"); if (Entry.isDirectory()) @@ -342,10 +305,6 @@ /*RequiresNullTerminator=*/false), Entry.getStatus()); - const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); - if (EntrySkipMappings && !EntrySkipMappings->empty()) - PPSkipMappings[Result->Buffer->getBufferStart()] = EntrySkipMappings; - return llvm::ErrorOr>( std::unique_ptr(std::move(Result))); } @@ -358,5 +317,5 @@ llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); if (!Result) return Result.getError(); - return DepScanFile::create(Result.get(), PPSkipMappings); + return DepScanFile::create(Result.get()); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -137,12 +137,11 @@ DependencyScanningAction( StringRef WorkingDirectory, DependencyConsumer &Consumer, llvm::IntrusiveRefCntPtr DepFS, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings, ScanningOutputFormat Format, bool OptimizeArgs, llvm::Optional ModuleName = None) : WorkingDirectory(WorkingDirectory), Consumer(Consumer), - DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), Format(Format), - OptimizeArgs(OptimizeArgs), ModuleName(ModuleName) {} + DepFS(std::move(DepFS)), Format(Format), OptimizeArgs(OptimizeArgs), + ModuleName(ModuleName) {} bool runInvocation(std::shared_ptr Invocation, FileManager *FileMgr, @@ -183,29 +182,21 @@ // Use the dependency scanning optimized file system if requested to do so. if (DepFS) { - DepFS->enableDirectivesScanningOfAllFiles(); - // Don't minimize any files that contributed to prebuilt modules. The - // implicit build validates the modules by comparing the reported sizes of - // their inputs to the current state of the filesystem. Minimization would - // throw this mechanism off. - for (const auto &File : PrebuiltModulesInputFiles) - DepFS->disableDirectivesScanning(File.getKey()); - // Don't minimize any files that were explicitly passed in the build - // settings and that might be opened. - for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries) - DepFS->disableDirectivesScanning(E.Path); - for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles) - DepFS->disableDirectivesScanning(F); - // Support for virtual file system overlays on top of the caching // filesystem. FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation( ScanInstance.getInvocation(), ScanInstance.getDiagnostics(), DepFS)); - // Pass the skip mappings which should speed up excluded conditional block - // skipping in the preprocessor. - ScanInstance.getPreprocessorOpts() - .ExcludedConditionalDirectiveSkipMappings = &PPSkipMappings; + llvm::IntrusiveRefCntPtr LocalDepFS = + DepFS; + ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile = + [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File) + -> Optional> { + if (llvm::ErrorOr Entry = + LocalDepFS->getOrCreateFileSystemEntry(File.getName())) + return Entry->getDirectiveTokens(); + return None; + }; } // Create the dependency collector that will collect the produced @@ -262,7 +253,6 @@ StringRef WorkingDirectory; DependencyConsumer &Consumer; llvm::IntrusiveRefCntPtr DepFS; - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings; ScanningOutputFormat Format; bool OptimizeArgs; llvm::Optional ModuleName; @@ -289,7 +279,7 @@ if (Service.getMode() == ScanningMode::DependencyDirectivesScan) DepFS = new DependencyScanningWorkerFilesystem(Service.getSharedCache(), - RealFS, PPSkipMappings); + RealFS); if (Service.canReuseFileManager()) Files = new FileManager(FileSystemOptions(), RealFS); } @@ -340,8 +330,8 @@ return runWithDiags(CreateAndPopulateDiagOpts(FinalCCommandLine).release(), [&](DiagnosticConsumer &DC, DiagnosticOptions &DiagOpts) { DependencyScanningAction Action( - WorkingDirectory, Consumer, DepFS, PPSkipMappings, - Format, OptimizeArgs, ModuleName); + WorkingDirectory, Consumer, DepFS, Format, + OptimizeArgs, ModuleName); // Create an invocation that uses the underlying file // system to ensure that any file system requests that // are made by the driver do not go through the diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c --- a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c @@ -1,3 +1,4 @@ -// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 +// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s -#define 0 0 // expected-error {{macro name must be an identifier}} +#define 0 0 +// CHECK: #define 0 0 diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c --- a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c @@ -15,7 +15,7 @@ #pragma include_alias(, "mystring.h") // CHECK: #pragma once -// CHECK-NEXT: #pragma push_macro( "MYMACRO" ) +// CHECK-NEXT: #pragma push_macro("MYMACRO") // CHECK-NEXT: #pragma pop_macro("MYMACRO") // CHECK-NEXT: #pragma clang module import mymodule // CHECK-NEXT: #pragma include_alias(, "mystring.h") diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -14,39 +14,58 @@ using namespace clang; using namespace clang::dependency_directives_scan; -static bool minimizeSourceToDependencyDirectives(StringRef Input, - SmallVectorImpl &Out) { - SmallVector Directives; - return scanSourceForDependencyDirectives(Input, Out, Directives); +static bool minimizeSourceToDependencyDirectives( + StringRef Input, SmallVectorImpl &Out, + SmallVectorImpl &Tokens, + SmallVectorImpl &Directives) { + Out.clear(); + Tokens.clear(); + Directives.clear(); + if (scanSourceForDependencyDirectives(Input, Tokens, Directives)) + return true; + + raw_svector_ostream OS(Out); + printDependencyDirectivesAsSource(Input, Directives, OS); + if (!Out.empty() && Out.back() != '\n') + Out.push_back('\n'); + Out.push_back('\0'); + Out.pop_back(); + + return false; } -static bool -minimizeSourceToDependencyDirectives(StringRef Input, - SmallVectorImpl &Out, - SmallVectorImpl &Directives) { - return scanSourceForDependencyDirectives(Input, Out, Directives); +static bool minimizeSourceToDependencyDirectives(StringRef Input, + SmallVectorImpl &Out) { + SmallVector Tokens; + SmallVector Directives; + return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives); } namespace { TEST(MinimizeSourceToDependencyDirectivesTest, Empty) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; - ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Directives)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("", Out, Tokens, Directives)); EXPECT_TRUE(Out.empty()); + EXPECT_TRUE(Tokens.empty()); ASSERT_EQ(1u, Directives.size()); ASSERT_EQ(pp_eof, Directives.back().Kind); - ASSERT_FALSE( - minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Directives)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens, + Directives)); EXPECT_TRUE(Out.empty()); + EXPECT_TRUE(Tokens.empty()); ASSERT_EQ(1u, Directives.size()); ASSERT_EQ(pp_eof, Directives.back().Kind); } -TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) { +TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; ASSERT_FALSE( @@ -71,7 +90,7 @@ "#pragma include_alias(, )\n" "export module m;\n" "import m;\n", - Out, Directives)); + Out, Tokens, Directives)); EXPECT_EQ(pp_define, Directives[0].Kind); EXPECT_EQ(pp_undef, Directives[1].Kind); EXPECT_EQ(pp_endif, Directives[2].Kind); @@ -91,19 +110,28 @@ EXPECT_EQ(pp_pragma_push_macro, Directives[16].Kind); EXPECT_EQ(pp_pragma_pop_macro, Directives[17].Kind); EXPECT_EQ(pp_pragma_include_alias, Directives[18].Kind); - EXPECT_EQ(cxx_export_decl, Directives[19].Kind); - EXPECT_EQ(cxx_module_decl, Directives[20].Kind); - EXPECT_EQ(cxx_import_decl, Directives[21].Kind); - EXPECT_EQ(pp_eof, Directives[22].Kind); + EXPECT_EQ(cxx_export_module_decl, Directives[19].Kind); + EXPECT_EQ(cxx_import_decl, Directives[20].Kind); + EXPECT_EQ(pp_eof, Directives[21].Kind); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, EmptyHash) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#\n#define MACRO a\n", Out)); + EXPECT_STREQ("#define MACRO a\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, Define) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; - ASSERT_FALSE( - minimizeSourceToDependencyDirectives("#define MACRO", Out, Directives)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out, + Tokens, Directives)); EXPECT_STREQ("#define MACRO\n", Out.data()); + ASSERT_EQ(4u, Tokens.size()); ASSERT_EQ(2u, Directives.size()); ASSERT_EQ(pp_define, Directives.front().Kind); } @@ -144,25 +172,25 @@ ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO con tent ", Out)); - EXPECT_STREQ("#define MACRO con tent\n", Out.data()); + EXPECT_STREQ("#define MACRO con tent\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO() con tent ", Out)); - EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) { SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO((a))\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO(\n", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO(a*b)\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) { @@ -170,19 +198,19 @@ ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\t)\tcon \t tent\t", Out)); - EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data()); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\f)\fcon \f tent\f", Out)); - EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data()); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\v)\vcon \v tent\v", Out)); - EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data()); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out)); - EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data()); + EXPECT_STREQ("#define MACRO con tent\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) { @@ -255,25 +283,27 @@ TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) { SmallVector Out; - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define 0\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) { SmallVector Out; - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define &\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) { SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out)); - EXPECT_STREQ("#define AND &\n", Out.data()); + EXPECT_STREQ("#define AND&\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n" "&\n", Out)); - EXPECT_STREQ("#define AND &\n", Out.data()); + EXPECT_STREQ("#define AND\\\n" + "&\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) { @@ -303,6 +333,14 @@ Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, CommentSlashSlashStar) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO 1 //* blah */\n", Out)); + EXPECT_STREQ("#define MACRO 1\n", Out.data()); +} + TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) { SmallVector Out; @@ -481,6 +519,9 @@ ASSERT_FALSE( minimizeSourceToDependencyDirectives("#__include_macros \n", Out)); EXPECT_STREQ("#__include_macros \n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include MACRO\n", Out)); + EXPECT_STREQ("#include MACRO\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { @@ -507,8 +548,9 @@ SmallVector Out; ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out)); - ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); - ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) { @@ -559,7 +601,8 @@ "#define GUARD\n" "#endif\n", Out)); - EXPECT_STREQ("#ifndef GUARD\n" + EXPECT_STREQ("#if\\\n" + "ndef GUARD\n" "#define GUARD\n" "#endif\n", Out.data()); @@ -567,12 +610,16 @@ ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" "RD\n", Out)); - EXPECT_STREQ("#define GUARD\n", Out.data()); + EXPECT_STREQ("#define GUA\\\n" + "RD\n", + Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r" "RD\n", Out)); - EXPECT_STREQ("#define GUARD\n", Out.data()); + EXPECT_STREQ("#define GUA\\\r" + "RD\n", + Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" " RD\n", @@ -588,7 +635,10 @@ "2 + \\\t\n" "3\n", Out)); - EXPECT_STREQ("#define A 1 + 2 + 3\n", Out.data()); + EXPECT_STREQ("#define A 1+\\ \n" + "2+\\\t\n" + "3\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) { @@ -682,6 +732,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; StringRef Source = R"(// comment @@ -689,7 +740,8 @@ // another comment #include )"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); EXPECT_STREQ("#pragma once\n#include \n", Out.data()); ASSERT_EQ(Directives.size(), 3u); EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_pragma_once); @@ -700,7 +752,7 @@ #include )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#pragma once\n#include \n", Out.data()); + EXPECT_STREQ("#pragma once extra tokens\n#include \n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, @@ -755,11 +807,12 @@ Source = "#define X \"\\ \r\nx\n#include \n"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#define X \"\\ \r\nx\n#include \n", Out.data()); + EXPECT_STREQ("#define X\"\\ \r\nx\n#include \n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) { SmallVector Out; + SmallVector Tokens; SmallVector Directives; StringRef Source = R"( @@ -789,81 +842,17 @@ import f(->a = 3); } )"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); - EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;\n" - "export import :l [[rename]];\n" - "import <<= 3;\nimport a b d e d e f e;\n" - "import foo [[no_unique_address]];\nimport foo();\n" - "import f(:sefse);\nimport f(->a = 3);\n", + ASSERT_FALSE( + minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); + EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;" + "exp\\\nort import:l[[rename]];" + "import<<=3;import a b d e d e f e;" + "import foo[[no_unique_address]];import foo();" + "import f(:sefse);import f(->a=3);\n", Out.data()); - ASSERT_EQ(Directives.size(), 12u); - EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_include); - EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::cxx_module_decl); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasic) { - SmallString<128> Out; - SmallVector Directives; - StringRef Source = "#ifndef GUARD\n" - "#define GUARD\n" - "void foo();\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); - EXPECT_EQ(Ranges.size(), 1u); - EXPECT_EQ(Ranges[0].Offset, 0); - EXPECT_EQ(Ranges[0].Length, (int)Out.find("#endif")); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasicElifdef) { - SmallString<128> Out; - SmallVector Directives; - StringRef Source = "#ifdef BLAH\n" - "void skip();\n" - "#elifdef BLAM\n" - "void skip();\n" - "#elifndef GUARD\n" - "#define GUARD\n" - "void foo();\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); - EXPECT_EQ(Ranges.size(), 3u); - EXPECT_EQ(Ranges[0].Offset, 0); - EXPECT_EQ(Ranges[0].Length, (int)Out.find("#elifdef")); - EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elifdef")); - EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#elifndef")); - EXPECT_EQ(Ranges[2].Offset, (int)Out.find("#elifndef")); - EXPECT_EQ(Ranges[2].Offset + Ranges[2].Length, (int)Out.rfind("#endif")); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesNested) { - SmallString<128> Out; - SmallVector Directives; - StringRef Source = "#ifndef GUARD\n" - "#define GUARD\n" - "#if FOO\n" - "#include hello\n" - "#elif BAR\n" - "#include bye\n" - "#endif\n" - "#else\n" - "#include nothing\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Directives, Ranges)); - EXPECT_EQ(Ranges.size(), 4u); - EXPECT_EQ(Ranges[0].Offset, (int)Out.find("#if FOO")); - EXPECT_EQ(Ranges[0].Offset + Ranges[0].Length, (int)Out.find("#elif")); - EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elif BAR")); - EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#endif")); - EXPECT_EQ(Ranges[2].Offset, 0); - EXPECT_EQ(Ranges[2].Length, (int)Out.find("#else")); - EXPECT_EQ(Ranges[3].Offset, (int)Out.find("#else")); - EXPECT_EQ(Ranges[3].Offset + Ranges[3].Length, (int)Out.rfind("#endif")); + ASSERT_EQ(Directives.size(), 10u); + EXPECT_EQ(Directives[0].Kind, pp_include); + EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl); } } // end anonymous namespace diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp --- a/clang/unittests/Tooling/DependencyScannerTest.cpp +++ b/clang/unittests/Tooling/DependencyScannerTest.cpp @@ -204,53 +204,5 @@ EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h"); } -namespace dependencies { -TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) { - auto VFS = llvm::makeIntrusiveRefCnt(); - VFS->addFile("/mod.h", 0, - llvm::MemoryBuffer::getMemBuffer("#include \n" - "// hi there!\n")); - - DependencyScanningFilesystemSharedCache SharedCache; - ExcludedPreprocessorDirectiveSkipMapping Mappings; - DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings); - - DepFS.enableDirectivesScanningOfAllFiles(); // Let's be explicit for clarity. - auto StatusMinimized0 = DepFS.status("/mod.h"); - DepFS.disableDirectivesScanning("/mod.h"); - auto StatusFull1 = DepFS.status("/mod.h"); - - EXPECT_TRUE(StatusMinimized0); - EXPECT_TRUE(StatusFull1); - EXPECT_EQ(StatusMinimized0->getSize(), 17u); - EXPECT_EQ(StatusFull1->getSize(), 30u); - EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h")); - EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h")); -} - -TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) { - auto VFS = llvm::makeIntrusiveRefCnt(); - VFS->addFile("/mod.h", 0, - llvm::MemoryBuffer::getMemBuffer("#include \n" - "// hi there!\n")); - - DependencyScanningFilesystemSharedCache SharedCache; - ExcludedPreprocessorDirectiveSkipMapping Mappings; - DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings); - - DepFS.disableDirectivesScanning("/mod.h"); - auto StatusFull0 = DepFS.status("/mod.h"); - DepFS.enableDirectivesScanningOfAllFiles(); - auto StatusMinimized1 = DepFS.status("/mod.h"); - - EXPECT_TRUE(StatusFull0); - EXPECT_TRUE(StatusMinimized1); - EXPECT_EQ(StatusFull0->getSize(), 30u); - EXPECT_EQ(StatusMinimized1->getSize(), 17u); - EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h")); - EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h")); -} - -} // end namespace dependencies } // end namespace tooling } // end namespace clang