diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -876,11 +876,11 @@ } -let CategoryName = "Dependency Directive Source Minimization Issue" in { +let CategoryName = "Dependency Directive Source Scanner Issue" in { -def err_dep_source_minimizer_missing_sema_after_at_import : Error< +def err_dep_source_scanner_missing_semi_after_at_import : Error< "could not find ';' after @import">; -def err_dep_source_minimizer_unexpected_tokens_at_import : Error< +def err_dep_source_scanner_unexpected_tokens_at_import : Error< "unexpected extra tokens at end of @import declaration">; } diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h @@ -0,0 +1,136 @@ +//===- clang/Lex/DependencyDirectivesScanner.h ---------------------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the interface for scanning header and source files to get the +/// minimum necessary preprocessor directives for evaluating includes. It +/// reduces the source down to #define, #include, #import, @import, and any +/// conditional preprocessor logic that contains one of those. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H +#define LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/ArrayRef.h" + +namespace clang { + +namespace tok { +enum TokenKind : unsigned short; +} + +class DiagnosticsEngine; + +namespace dependency_directives_scan { + +/// Token lexed as part of dependency directive scanning. +struct Token { + /// Offset into the original source input. + unsigned Offset; + unsigned Length; + tok::TokenKind Kind; + unsigned short Flags; + + Token(unsigned Offset, unsigned Length, tok::TokenKind Kind, + unsigned short Flags) + : Offset(Offset), Length(Length), Kind(Kind), Flags(Flags) {} + + unsigned getEnd() const { return Offset + Length; } + + bool is(tok::TokenKind K) const { return Kind == K; } + bool isNot(tok::TokenKind K) const { return Kind != K; } + bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { + return is(K1) || is(K2); + } + template bool isOneOf(tok::TokenKind K1, Ts... Ks) const { + return is(K1) || isOneOf(Ks...); + } +}; + +/// Represents the kind of preprocessor directive or a module declaration that +/// is tracked by the scanner in its token output. +enum DirectiveKind : uint8_t { + pp_none, + pp_include, + pp___include_macros, + pp_define, + pp_undef, + pp_import, + pp_pragma_import, + pp_pragma_once, + pp_pragma_push_macro, + pp_pragma_pop_macro, + pp_pragma_include_alias, + pp_include_next, + pp_if, + pp_ifdef, + pp_ifndef, + pp_elif, + pp_elifdef, + pp_elifndef, + pp_else, + pp_endif, + decl_at_import, + cxx_module_decl, + cxx_import_decl, + cxx_export_module_decl, + cxx_export_import_decl, + pp_eof, +}; + +/// Represents a directive that's lexed as part of the dependency directives +/// scanning. It's used to track various preprocessor directives that could +/// potentially have an effect on the depedencies. +struct Directive { + ArrayRef Tokens; + + /// The kind of token. + DirectiveKind Kind = pp_none; + + Directive() = default; + Directive(DirectiveKind K, ArrayRef Tokens) + : Tokens(Tokens), Kind(K) {} +}; + +} // end namespace dependency_directives_scan + +/// Scan the input for the preprocessor directives that might have +/// an effect on the dependencies for a compilation unit. +/// +/// This function ignores all non-preprocessor code and anything that +/// can't affect what gets included. +/// +/// \returns false on success, true on error. If the diagnostic engine is not +/// null, an appropriate error is reported using the given input location +/// with the offset that corresponds to the \p Input buffer offset. +bool scanSourceForDependencyDirectives( + StringRef Input, SmallVectorImpl &Tokens, + SmallVectorImpl &Directives, + DiagnosticsEngine *Diags = nullptr, + SourceLocation InputSourceLoc = SourceLocation()); + +/// Print the previously scanned dependency directives as minimized source text. +/// +/// \param Source The original source text that the dependency directives were +/// scanned from. +/// \param Directives The previously scanned dependency +/// directives. +/// \param OS the stream to print the dependency directives on. +/// +/// This is used primarily for testing purposes, during dependency scanning the +/// \p Lexer uses the tokens directly, not their printed version. +void printDependencyDirectivesAsSource( + StringRef Source, + ArrayRef Directives, + llvm::raw_ostream &OS); + +} // end namespace clang + +#endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H diff --git a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h deleted file mode 100644 --- a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h +++ /dev/null @@ -1,115 +0,0 @@ -//===- clang/Lex/DependencyDirectivesSourceMinimizer.h - ----------*- C++ -*-// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This is the interface for minimizing header and source files to the -/// minimum necessary preprocessor directives for evaluating includes. It -/// reduces the source down to #define, #include, #import, @import, and any -/// conditional preprocessor logic that contains one of those. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H -#define LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H - -#include "clang/Basic/SourceLocation.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" - -namespace clang { - -class DiagnosticsEngine; - -namespace minimize_source_to_dependency_directives { - -/// Represents the kind of preprocessor directive or a module declaration that -/// is tracked by the source minimizer in its token output. -enum TokenKind { - pp_none, - pp_include, - pp___include_macros, - pp_define, - pp_undef, - pp_import, - pp_pragma_import, - pp_pragma_once, - pp_pragma_push_macro, - pp_pragma_pop_macro, - pp_pragma_include_alias, - pp_include_next, - pp_if, - pp_ifdef, - pp_ifndef, - pp_elif, - pp_elifdef, - pp_elifndef, - pp_else, - pp_endif, - decl_at_import, - cxx_export_decl, - cxx_module_decl, - cxx_import_decl, - pp_eof, -}; - -/// Represents a simplified token that's lexed as part of the source -/// minimization. It's used to track the location of various preprocessor -/// directives that could potentially have an effect on the depedencies. -struct Token { - /// The kind of token. - TokenKind K = pp_none; - - /// Offset into the output byte stream of where the directive begins. - int Offset = -1; - - Token(TokenKind K, int Offset) : K(K), Offset(Offset) {} -}; - -/// Simplified token range to track the range of a potentially skippable PP -/// directive. -struct SkippedRange { - /// Offset into the output byte stream of where the skipped directive begins. - int Offset; - - /// The number of bytes that can be skipped before the preprocessing must - /// resume. - int Length; -}; - -/// Computes the potential source ranges that can be skipped by the preprocessor -/// when skipping a directive like #if, #ifdef or #elsif. -/// -/// \returns false on success, true on error. -bool computeSkippedRanges(ArrayRef Input, - llvm::SmallVectorImpl &Range); - -} // end namespace minimize_source_to_dependency_directives - -/// Minimize the input down to the preprocessor directives that might have -/// an effect on the dependencies for a compilation unit. -/// -/// This function deletes all non-preprocessor code, and strips anything that -/// can't affect what gets included. It canonicalizes whitespace where -/// convenient to stabilize the output against formatting changes in the input. -/// -/// Clears the output vectors at the beginning of the call. -/// -/// \returns false on success, true on error. If the diagnostic engine is not -/// null, an appropriate error is reported using the given input location -/// with the offset that corresponds to the minimizer's current buffer offset. -bool minimizeSourceToDependencyDirectives( - llvm::StringRef Input, llvm::SmallVectorImpl &Output, - llvm::SmallVectorImpl - &Tokens, - DiagnosticsEngine *Diags = nullptr, - SourceLocation InputSourceLoc = SourceLocation()); - -} // end namespace clang - -#endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSOURCEMINIMIZER_H diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -16,6 +16,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Lex/PreprocessorLexer.h" #include "clang/Lex/Token.h" #include "llvm/ADT/Optional.h" @@ -149,6 +150,13 @@ // CurrentConflictMarkerState - The kind of conflict marker we are handling. ConflictMarkerKind CurrentConflictMarkerState; + /// Non-empty if this \p Lexer is \p isDependencyDirectivesLexer(). + ArrayRef DepDirectives; + + /// If this \p Lexer is \p isDependencyDirectivesLexer(), it represents the + /// next token to use from the current dependency directive. + unsigned NextDepDirectiveTokenIndex = 0; + void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd); public: @@ -195,6 +203,23 @@ /// return the tok::eof token. This implicitly involves the preprocessor. bool Lex(Token &Result); + /// Called when the preprocessor is in 'dependency scanning lexing mode'. + bool LexDependencyDirectiveToken(Token &Result); + + /// Called when the preprocessor is in 'dependency scanning lexing mode' and + /// is skipping a conditional block. + bool LexDependencyDirectiveTokenWhileSkipping(Token &Result); + + /// True when the preprocessor is in 'dependency scanning lexing mode' and + /// created this \p Lexer for lexing a set of dependency directive tokens. + bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); } + + /// Initializes \p Result with data from \p DDTok and advances \p BufferPtr to + /// the position just after the token. + /// \returns the buffer pointer at the beginning of the token. + const char *convertDependencyDirectiveToken( + const dependency_directives_scan::Token &DDTok, Token &Result); + public: /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma. bool isPragmaLexer() const { return Is_PragmaLexer; } @@ -288,14 +313,8 @@ return BufferPtr - BufferStart; } - /// Skip over \p NumBytes bytes. - /// - /// If the skip is successful, the next token will be lexed from the new - /// offset. The lexer also assumes that we skipped to the start of the line. - /// - /// \returns true if the skip failed (new offset would have been past the - /// end of the buffer), false otherwise. - bool skipOver(unsigned NumBytes); + /// Set the lexer's buffer pointer to \p Offset. + void seek(unsigned Offset, bool IsAtStartOfLine); /// Stringify - Convert the specified string into a C string by i) escaping /// '\\' and " characters and ii) replacing newline character(s) with "\\n". diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -29,7 +29,6 @@ #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PPCallbacks.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/ArrayRef.h" @@ -558,6 +557,7 @@ CLK_Lexer, CLK_TokenLexer, CLK_CachingLexer, + CLK_DependencyDirectivesLexer, CLK_LexAfterModuleImport } CurLexerKind = CLK_Lexer; @@ -2584,14 +2584,6 @@ void emitMacroDeprecationWarning(const Token &Identifier) const; void emitRestrictExpansionWarning(const Token &Identifier) const; void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; - - Optional - getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc); - - /// Contains the currently active skipped range mappings for skipping excluded - /// conditional directives. - ExcludedPreprocessorDirectiveSkipMapping - *ExcludedConditionalDirectiveSkipMappings; }; /// Abstract base class that describes a handler that will receive diff --git a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h deleted file mode 100644 --- a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- PreprocessorExcludedConditionalDirectiveSkipMapping.h - --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H -#define LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H - -#include "clang/Basic/LLVM.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/MemoryBuffer.h" - -namespace clang { - -/// A mapping from an offset into a buffer to the number of bytes that can be -/// skipped by the preprocessor when skipping over excluded conditional -/// directive ranges. -using PreprocessorSkippedRangeMapping = llvm::DenseMap; - -/// The datastructure that holds the mapping between the active memory buffers -/// and the individual skip mappings. -using ExcludedPreprocessorDirectiveSkipMapping = - llvm::DenseMap; - -} // end namespace clang - -#endif // LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -11,7 +11,7 @@ #include "clang/Basic/BitmaskEnum.h" #include "clang/Basic/LLVM.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include @@ -200,13 +200,18 @@ /// build it again. std::shared_ptr FailedModules; - /// Contains the currently active skipped range mappings for skipping excluded - /// conditional directives. + /// Function for getting the dependency preprocessor directives of a file. /// - /// The pointer is passed to the Preprocessor when it's constructed. The - /// pointer is unowned, the client is responsible for its lifetime. - ExcludedPreprocessorDirectiveSkipMapping - *ExcludedConditionalDirectiveSkipMappings = nullptr; + /// These are directives derived from a special form of lexing where the + /// source input is scanned for the preprocessor directives that might have an + /// effect on the dependencies for a compilation unit. + /// + /// Enables a client to cache the directives for a file and provide them + /// across multiple compiler invocations. + /// FIXME: Allow returning an error. + std::function>( + FileEntryRef)> + DependencyDirectivesForFile; /// Set up preprocessor for RunAnalysis action. bool SetUpStaticAnalyzer = false; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -10,7 +10,7 @@ #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H #include "clang/Basic/LLVM.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" @@ -22,32 +22,34 @@ namespace tooling { namespace dependencies { -/// Original and minimized contents of a cached file entry. Single instance can +using DependencyDirectivesTy = + SmallVector; + +/// Contents and directive tokens of a cached file entry. Single instance can /// be shared between multiple entries. struct CachedFileContents { - CachedFileContents(std::unique_ptr Original) - : Original(std::move(Original)), MinimizedAccess(nullptr) {} + CachedFileContents(std::unique_ptr Contents) + : Contents(std::move(Contents)), DepDirectives(nullptr) {} - /// Owning storage for the minimized contents. - std::unique_ptr Original; + /// Owning storage for the file contents. + std::unique_ptr Contents; - /// The mutex that must be locked before mutating minimized contents. + /// The mutex that must be locked before mutating directive tokens. std::mutex ValueLock; - /// Owning storage for the minimized contents. - std::unique_ptr MinimizedStorage; - /// Accessor to the minimized contents that's atomic to avoid data races. - std::atomic MinimizedAccess; - /// Skipped range mapping of the minimized contents. - /// This is initialized iff `MinimizedAccess != nullptr`. - PreprocessorSkippedRangeMapping PPSkippedRangeMapping; + SmallVector DepDirectiveTokens; + /// Accessor to the directive tokens that's atomic to avoid data races. + /// \p CachedFileContents has ownership of the pointer. + std::atomic *> DepDirectives; + + ~CachedFileContents() { delete DepDirectives.load(); } }; /// An in-memory representation of a file system entity that is of interest to /// the dependency scanning filesystem. /// /// It represents one of the following: -/// - opened file with original contents and a stat value, -/// - opened file with original contents, minimized contents and a stat value, +/// - opened file with contents and a stat value, +/// - opened file with contents, directive tokens and a stat value, /// - directory entry with its stat value, /// - filesystem error. /// @@ -76,22 +78,12 @@ /// \returns True if the current entry represents a directory. bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); } - /// \returns Original contents of the file. - StringRef getOriginalContents() const { - assert(!isError() && "error"); - assert(!MaybeStat->isDirectory() && "not a file"); - assert(Contents && "contents not initialized"); - return Contents->Original->getBuffer(); - } - - /// \returns Minimized contents of the file. - StringRef getMinimizedContents() const { + /// \returns Contents of the file. + StringRef getContents() const { assert(!isError() && "error"); assert(!MaybeStat->isDirectory() && "not a file"); assert(Contents && "contents not initialized"); - llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load(); - assert(Buffer && "not minimized"); - return Buffer->getBuffer(); + return Contents->Contents->getBuffer(); } /// \returns The error. @@ -110,17 +102,23 @@ return MaybeStat->getUniqueID(); } - /// \returns The mapping between location -> distance that is used to speed up - /// the block skipping in the preprocessor. - const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const { + /// \returns The scanned preprocessor directive tokens of the file that are + /// used to speed up preprocessing, if available. + Optional> + getDirectiveTokens() const { assert(!isError() && "error"); assert(!isDirectory() && "not a file"); assert(Contents && "contents not initialized"); - return Contents->PPSkippedRangeMapping; + if (auto *Directives = Contents->DepDirectives.load()) { + if (Directives->hasValue()) + return ArrayRef( + Directives->getValue()); + } + return None; } - /// \returns The data structure holding both original and minimized contents. - CachedFileContents *getContents() const { + /// \returns The data structure holding both contents and directive tokens. + CachedFileContents *getCachedContents() const { assert(!isError() && "error"); assert(!isDirectory() && "not a file"); return Contents; @@ -145,7 +143,7 @@ }; /// This class is a shared cache, that caches the 'stat' and 'open' calls to the -/// underlying real file system. It distinguishes between minimized and original +/// underlying real file system, and the scanned preprocessor directives of /// files. /// /// It is sharded based on the hash of the key to reduce the lock contention for @@ -210,8 +208,7 @@ }; /// This class is a local cache, that caches the 'stat' and 'open' calls to the -/// underlying real file system. It distinguishes between minimized and original -/// files. +/// underlying real file system. class DependencyScanningFilesystemLocalCache { llvm::StringMap Cache; @@ -234,14 +231,9 @@ }; /// Reference to a CachedFileSystemEntry. -/// If the underlying entry is an opened file, this wrapper returns the correct -/// contents (original or minimized) and ensures consistency with file size -/// reported by status. +/// If the underlying entry is an opened file, this wrapper returns the file +/// contents and the scanned preprocessor directives. class EntryRef { - /// For entry that is an opened file, this bit signifies whether its contents - /// are minimized. - bool Minimized; - /// The filename used to access this entry. std::string Filename; @@ -249,8 +241,8 @@ const CachedFileSystemEntry &Entry; public: - EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry) - : Minimized(Minimized), Filename(Name), Entry(Entry) {} + EntryRef(StringRef Name, const CachedFileSystemEntry &Entry) + : Filename(Name), Entry(Entry) {} llvm::vfs::Status getStatus() const { llvm::vfs::Status Stat = Entry.getStatus(); @@ -269,13 +261,11 @@ return *this; } - StringRef getContents() const { - return Minimized ? Entry.getMinimizedContents() - : Entry.getOriginalContents(); - } + StringRef getContents() const { return Entry.getContents(); } - const PreprocessorSkippedRangeMapping *getPPSkippedRangeMapping() const { - return Minimized ? &Entry.getPPSkippedRangeMapping() : nullptr; + Optional> + getDirectiveTokens() const { + return Entry.getDirectiveTokens(); } }; @@ -292,23 +282,17 @@ public: DependencyScanningWorkerFilesystem( DependencyScanningFilesystemSharedCache &SharedCache, - IntrusiveRefCntPtr FS, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) - : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache), - PPSkipMappings(PPSkipMappings) {} + IntrusiveRefCntPtr FS) + : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {} llvm::ErrorOr status(const Twine &Path) override; llvm::ErrorOr> openFileForRead(const Twine &Path) override; - /// Disable minimization of the given file. - void disableMinimization(StringRef Filename); - /// Enable minimization of all files. - void enableMinimizationOfAllFiles() { NotToBeMinimized.clear(); } - -private: - /// Check whether the file should be minimized. - bool shouldMinimize(StringRef Filename, llvm::sys::fs::UniqueID UID); + /// Disable directives scanning of the given file. + void disableDirectivesScanning(StringRef Filename); + /// Enable directives scanning of all files. + void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); } /// Returns entry for the given filename. /// @@ -316,7 +300,11 @@ /// using the underlying filesystem. llvm::ErrorOr getOrCreateFileSystemEntry(StringRef Filename, - bool DisableMinimization = false); + bool DisableDirectivesScanning = false); + +private: + /// Check whether the file should be scanned for preprocessor directives. + bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID); /// For a filename that's not yet associated with any entry in the caches, /// uses the underlying filesystem to either look up the entry based in the @@ -324,10 +312,10 @@ llvm::ErrorOr computeAndStoreResult(StringRef Filename); - /// Minimizes the given entry if necessary and returns a wrapper object with - /// reference semantics. - EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry, - StringRef Filename, bool Disable); + /// Scan for preprocessor directives for the given entry if necessary and + /// returns a wrapper object with reference semantics. + EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry, + StringRef Filename, bool Disable); /// Represents a filesystem entry that has been stat-ed (and potentially read) /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. @@ -398,12 +386,8 @@ /// The local cache is used by the worker thread to cache file system queries /// locally instead of querying the global cache every time. DependencyScanningFilesystemLocalCache LocalCache; - /// The mapping structure which records information about the - /// excluded conditional directive skip mappings that are used by the - /// currently active preprocessor. - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings; - /// The set of files that should not be minimized. - llvm::DenseSet NotToBeMinimized; + /// The set of files that should not be scanned for PP directives. + llvm::DenseSet NotToBeScanned; }; } // end namespace dependencies diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -19,15 +19,13 @@ /// dependencies. enum class ScanningMode { /// This mode is used to compute the dependencies by running the preprocessor - /// over - /// the unmodified source files. + /// over the source files. CanonicalPreprocessing, /// This mode is used to compute the dependencies by running the preprocessor - /// over - /// the source files that have been minimized to contents that might affect - /// the dependencies. - MinimizedSourcePreprocessing + /// with special kind of lexing after scanning header and source files to get + /// the minimum necessary preprocessor directives for evaluating includes. + DependencyDirectivesScan, }; /// The format that is output by the dependency scanner. diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -13,7 +13,6 @@ #include "clang/Basic/FileManager.h" #include "clang/Basic/LLVM.h" #include "clang/Frontend/PCHContainerOperations.h" -#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" #include "llvm/Support/Error.h" @@ -69,7 +68,6 @@ private: std::shared_ptr PCHContainerOps; - ExcludedPreprocessorDirectiveSkipMapping PPSkipMappings; /// The physical filesystem overlaid by `InMemoryFS`. llvm::IntrusiveRefCntPtr RealFS; diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -18,7 +18,7 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/MultiplexConsumer.h" #include "clang/Frontend/Utils.h" -#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" @@ -1155,10 +1155,10 @@ SourceManager &SM = CI.getPreprocessor().getSourceManager(); llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID()); - llvm::SmallString<1024> Output; - llvm::SmallVector Toks; - if (minimizeSourceToDependencyDirectives( - FromFile.getBuffer(), Output, Toks, &CI.getDiagnostics(), + llvm::SmallVector Tokens; + llvm::SmallVector Directives; + if (scanSourceForDependencyDirectives( + FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(), SM.getLocForStartOfFile(SM.getMainFileID()))) { assert(CI.getDiagnostics().hasErrorOccurred() && "no errors reported for failure"); @@ -1177,7 +1177,8 @@ } return; } - llvm::outs() << Output; + printDependencyDirectivesAsSource(FromFile.getBuffer(), Directives, + llvm::outs()); } void GetDependenciesByModuleNameAction::ExecuteAction() { diff --git a/clang/lib/Lex/CMakeLists.txt b/clang/lib/Lex/CMakeLists.txt --- a/clang/lib/Lex/CMakeLists.txt +++ b/clang/lib/Lex/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangLex - DependencyDirectivesSourceMinimizer.cpp + DependencyDirectivesScanner.cpp HeaderMap.cpp HeaderSearch.cpp InitHeaderSearch.cpp diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -0,0 +1,852 @@ +//===- DependencyDirectivesScanner.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the interface for scanning header and source files to get the +/// minimum necessary preprocessor directives for evaluating includes. It +/// reduces the source down to #define, #include, #import, @import, and any +/// conditional preprocessor logic that contains one of those. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/DependencyDirectivesScanner.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace clang; +using namespace clang::dependency_directives_scan; +using namespace llvm; + +namespace { + +struct DirectiveWithTokens { + DirectiveKind Kind; + unsigned NumTokens; + + DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) + : Kind(Kind), NumTokens(NumTokens) {} +}; + +/// Does an efficient "scan" of the sources to detect the presence of +/// preprocessor (or module import) directives and collects the raw lexed tokens +/// for those directives so that the \p Lexer can "replay" them when the file is +/// included. +/// +/// Note that the behavior of the raw lexer is affected by the language mode, +/// while at this point we want to do a scan and collect tokens once, +/// irrespective of the language mode that the file will get included in. To +/// compensate for that the \p Lexer, while "replaying", will adjust a token +/// where appropriate, when it could affect the preprocessor's state. +/// For example in a directive like +/// +/// \code +/// #if __has_cpp_attribute(clang::fallthrough) +/// \endcode +/// +/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 +/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' +/// while in C++ mode. +struct Scanner { + Scanner(StringRef Input, + SmallVectorImpl &Tokens, + DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) + : Input(Input), Tokens(Tokens), Diags(Diags), + InputSourceLoc(InputSourceLoc), + TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), + Input.end()) { + // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. + LangOpts.ObjC = true; + } + + /// Lex the provided source and emit the directive tokens. + /// + /// \returns True on error. + bool scan(SmallVectorImpl &Directives); + +private: + /// Lexes next token and advances \p First and the \p Lexer. + LLVM_NODISCARD dependency_directives_scan::Token & + lexToken(const char *&First, const char *const End); + + dependency_directives_scan::Token &lexIncludeFilename(const char *&First, + const char *const End); + + /// Lexes next token and if it is identifier returns its string, otherwise + /// returns \p None. + /// + /// In any case (whatever the token kind) \p First and the \p Lexer will + /// advance beyond the token. + LLVM_NODISCARD Optional tryLexIdentifier(const char *&First, + const char *const End); + + /// Used when it is certain that next token is an identifier. + LLVM_NODISCARD StringRef lexIdentifier(const char *&First, + const char *const End); + + /// Lexes next token and returns true iff it is an identifier that matches \p + /// Id. + /// + /// In any case (whatever the token kind) \p First and the \p Lexer will + /// advance beyond the token. + LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, + const char *const End); + + LLVM_NODISCARD bool scanImpl(const char *First, const char *const End); + LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); + LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First, + const char *const End); + LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); + LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First, + const char *const End); + LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind, + const char *&First, + const char *const End); + void lexPPDirectiveBody(const char *&First, const char *const End); + + DirectiveWithTokens &pushDirective(DirectiveKind Kind) { + Tokens.append(CurDirToks); + DirsWithToks.emplace_back(Kind, CurDirToks.size()); + CurDirToks.clear(); + return DirsWithToks.back(); + } + void popDirective() { + Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); + } + DirectiveKind topDirective() const { + return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; + } + + unsigned getOffsetAt(const char *CurPtr) const { + return CurPtr - Input.data(); + } + + /// Reports a diagnostic if the diagnostic engine is provided. Always returns + /// true at the end. + bool reportError(const char *CurPtr, unsigned Err); + + StringMap SplitIds; + StringRef Input; + SmallVectorImpl &Tokens; + DiagnosticsEngine *Diags; + SourceLocation InputSourceLoc; + + SmallVector CurDirToks; + SmallVector DirsWithToks; + LangOptions LangOpts; + Lexer TheLexer; +}; + +} // end anonymous namespace + +bool Scanner::reportError(const char *CurPtr, unsigned Err) { + if (!Diags) + return true; + assert(CurPtr >= Input.data() && "invalid buffer ptr"); + Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); + return true; +} + +static void skipOverSpaces(const char *&First, const char *const End) { + while (First != End && isHorizontalWhitespace(*First)) + ++First; +} + +LLVM_NODISCARD static bool isRawStringLiteral(const char *First, + const char *Current) { + assert(First <= Current); + + // Check if we can even back up. + if (*Current != '"' || First == Current) + return false; + + // Check for an "R". + --Current; + if (*Current != 'R') + return false; + if (First == Current || !isAsciiIdentifierContinue(*--Current)) + return true; + + // Check for a prefix of "u", "U", or "L". + if (*Current == 'u' || *Current == 'U' || *Current == 'L') + return First == Current || !isAsciiIdentifierContinue(*--Current); + + // Check for a prefix of "u8". + if (*Current != '8' || First == Current || *Current-- != 'u') + return false; + return First == Current || !isAsciiIdentifierContinue(*--Current); +} + +static void skipRawString(const char *&First, const char *const End) { + assert(First[0] == '"'); + assert(First[-1] == 'R'); + + const char *Last = ++First; + while (Last != End && *Last != '(') + ++Last; + if (Last == End) { + First = Last; // Hit the end... just give up. + return; + } + + StringRef Terminator(First, Last - First); + for (;;) { + // Move First to just past the next ")". + First = Last; + while (First != End && *First != ')') + ++First; + if (First == End) + return; + ++First; + + // Look ahead for the terminator sequence. + Last = First; + while (Last != End && size_t(Last - First) < Terminator.size() && + Terminator[Last - First] == *Last) + ++Last; + + // Check if we hit it (or the end of the file). + if (Last == End) { + First = Last; + return; + } + if (size_t(Last - First) < Terminator.size()) + continue; + if (*Last != '"') + continue; + First = Last + 1; + return; + } +} + +// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) +static unsigned isEOL(const char *First, const char *const End) { + if (First == End) + return 0; + if (End - First > 1 && isVerticalWhitespace(First[0]) && + isVerticalWhitespace(First[1]) && First[0] != First[1]) + return 2; + return !!isVerticalWhitespace(First[0]); +} + +static void skipString(const char *&First, const char *const End) { + assert(*First == '\'' || *First == '"' || *First == '<'); + const char Terminator = *First == '<' ? '>' : *First; + for (++First; First != End && *First != Terminator; ++First) { + // String and character literals don't extend past the end of the line. + if (isVerticalWhitespace(*First)) + return; + if (*First != '\\') + continue; + // Skip past backslash to the next character. This ensures that the + // character right after it is skipped as well, which matters if it's + // the terminator. + if (++First == End) + return; + if (!isWhitespace(*First)) + continue; + // Whitespace after the backslash might indicate a line continuation. + const char *FirstAfterBackslashPastSpace = First; + skipOverSpaces(FirstAfterBackslashPastSpace, End); + if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { + // Advance the character pointer to the next line for the next + // iteration. + First = FirstAfterBackslashPastSpace + NLSize - 1; + } + } + if (First != End) + ++First; // Finish off the string. +} + +// Returns the length of the skipped newline +static unsigned skipNewline(const char *&First, const char *End) { + if (First == End) + return 0; + assert(isVerticalWhitespace(*First)); + unsigned Len = isEOL(First, End); + assert(Len && "expected newline"); + First += Len; + return Len; +} + +static bool wasLineContinuation(const char *First, unsigned EOLLen) { + return *(First - (int)EOLLen - 1) == '\\'; +} + +static void skipToNewlineRaw(const char *&First, const char *const End) { + for (;;) { + if (First == End) + return; + + unsigned Len = isEOL(First, End); + if (Len) + return; + + do { + if (++First == End) + return; + Len = isEOL(First, End); + } while (!Len); + + if (First[-1] != '\\') + return; + + First += Len; + // Keep skipping lines... + } +} + +static void skipLineComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '/'); + First += 2; + skipToNewlineRaw(First, End); +} + +static void skipBlockComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '*'); + if (End - First < 4) { + First = End; + return; + } + for (First += 3; First != End; ++First) + if (First[-1] == '*' && First[0] == '/') { + ++First; + return; + } +} + +/// \returns True if the current single quotation mark character is a C++ 14 +/// digit separator. +static bool isQuoteCppDigitSeparator(const char *const Start, + const char *const Cur, + const char *const End) { + assert(*Cur == '\'' && "expected quotation character"); + // skipLine called in places where we don't expect a valid number + // body before `start` on the same line, so always return false at the start. + if (Start == Cur) + return false; + // The previous character must be a valid PP number character. + // Make sure that the L, u, U, u8 prefixes don't get marked as a + // separator though. + char Prev = *(Cur - 1); + if (Prev == 'L' || Prev == 'U' || Prev == 'u') + return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') + return false; + if (!isPreprocessingNumberBody(Prev)) + return false; + // The next character should be a valid identifier body character. + return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); +} + +static void skipLine(const char *&First, const char *const End) { + for (;;) { + assert(First <= End); + if (First == End) + return; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + return; + } + const char *Start = First; + while (First != End && !isVerticalWhitespace(*First)) { + // Iterate over strings correctly to avoid comments and newlines. + if (*First == '"' || + (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { + if (isRawStringLiteral(Start, First)) + skipRawString(First, End); + else + skipString(First, End); + continue; + } + + // Iterate over comments correctly. + if (*First != '/' || End - First < 2) { + ++First; + continue; + } + + if (First[1] == '/') { + // "//...". + skipLineComment(First, End); + continue; + } + + if (First[1] != '*') { + ++First; + continue; + } + + // "/*...*/". + skipBlockComment(First, End); + } + if (First == End) + return; + + // Skip over the newline. + unsigned Len = skipNewline(First, End); + if (!wasLineContinuation(First, Len)) // Continue past line-continuations. + break; + } +} + +static void skipDirective(StringRef Name, const char *&First, + const char *const End) { + if (llvm::StringSwitch(Name) + .Case("warning", true) + .Case("error", true) + .Default(false)) + // Do not process quotes or comments. + skipToNewlineRaw(First, End); + else + skipLine(First, End); +} + +static void skipWhitespace(const char *&First, const char *const End) { + for (;;) { + assert(First <= End); + skipOverSpaces(First, End); + + if (End - First < 2) + return; + + if (First[0] == '\\' && isVerticalWhitespace(First[1])) { + skipNewline(++First, End); + continue; + } + + // Check for a non-comment character. + if (First[0] != '/') + return; + + // "// ...". + if (First[1] == '/') { + skipLineComment(First, End); + return; + } + + // Cannot be a comment. + if (First[1] != '*') + return; + + // "/*...*/". + skipBlockComment(First, End); + } +} + +bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, + const char *const End) { + const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; + for (;;) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.is(tok::eof)) + return reportError( + DirectiveLoc, + diag::err_dep_source_scanner_missing_semi_after_at_import); + if (Tok.is(tok::semi)) + break; + } + pushDirective(Kind); + skipWhitespace(First, End); + if (First == End) + return false; + if (!isVerticalWhitespace(*First)) + return reportError( + DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); + skipNewline(First, End); + return false; +} + +dependency_directives_scan::Token &Scanner::lexToken(const char *&First, + const char *const End) { + clang::Token Tok; + TheLexer.LexFromRawLexer(Tok); + First = Input.data() + TheLexer.getCurrentBufferOffset(); + assert(First <= End); + + unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); + CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), + Tok.getFlags()); + return CurDirToks.back(); +} + +dependency_directives_scan::Token & +Scanner::lexIncludeFilename(const char *&First, const char *const End) { + clang::Token Tok; + TheLexer.LexIncludeFilename(Tok); + First = Input.data() + TheLexer.getCurrentBufferOffset(); + assert(First <= End); + + unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); + CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), + Tok.getFlags()); + return CurDirToks.back(); +} + +void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { + while (true) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.is(tok::eod)) + break; + } +} + +LLVM_NODISCARD Optional +Scanner::tryLexIdentifier(const char *&First, const char *const End) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.isNot(tok::raw_identifier)) + return None; + + bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; + if (LLVM_LIKELY(!NeedsCleaning)) + return Input.slice(Tok.Offset, Tok.getEnd()); + + SmallString<64> Spelling; + Spelling.resize(Tok.Length); + + unsigned SpellingLength = 0; + const char *BufPtr = Input.begin() + Tok.Offset; + const char *AfterIdent = Input.begin() + Tok.getEnd(); + while (BufPtr < AfterIdent) { + unsigned Size; + Spelling[SpellingLength++] = + Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); + BufPtr += Size; + } + + return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) + .first->first(); +} + +StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { + Optional Id = tryLexIdentifier(First, End); + assert(Id.hasValue() && "expected identifier token"); + return Id.getValue(); +} + +bool Scanner::isNextIdentifier(StringRef Id, const char *&First, + const char *const End) { + if (Optional FoundId = tryLexIdentifier(First, End)) { + return *FoundId == Id; + } + return false; +} + +bool Scanner::lexAt(const char *&First, const char *const End) { + // Handle "@import". + + // Lex '@'. + const dependency_directives_scan::Token &AtTok = lexToken(First, End); + assert(AtTok.is(tok::at)); + (void)AtTok; + + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + return lexModuleDirectiveBody(decl_at_import, First, End); +} + +bool Scanner::lexModule(const char *&First, const char *const End) { + StringRef Id = lexIdentifier(First, End); + bool Export = false; + if (Id == "export") { + Export = true; + Optional NextId = tryLexIdentifier(First, End); + if (!NextId) { + skipLine(First, End); + return false; + } + Id = *NextId; + } + + if (Id != "module" && Id != "import") { + skipLine(First, End); + return false; + } + + skipWhitespace(First, End); + + // Ignore this as a module directive if the next character can't be part of + // an import. + + switch (*First) { + case ':': + case '<': + case '"': + break; + default: + if (!isAsciiIdentifierContinue(*First)) { + skipLine(First, End); + return false; + } + } + + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); + + DirectiveKind Kind; + if (Id == "module") + Kind = Export ? cxx_export_module_decl : cxx_module_decl; + else + Kind = Export ? cxx_export_import_decl : cxx_import_decl; + + return lexModuleDirectiveBody(Kind, First, End); +} + +bool Scanner::lexPragma(const char *&First, const char *const End) { + Optional FoundId = tryLexIdentifier(First, End); + if (!FoundId) + return false; + + StringRef Id = FoundId.getValue(); + auto Kind = llvm::StringSwitch(Id) + .Case("once", pp_pragma_once) + .Case("push_macro", pp_pragma_push_macro) + .Case("pop_macro", pp_pragma_pop_macro) + .Case("include_alias", pp_pragma_include_alias) + .Default(pp_none); + if (Kind != pp_none) { + lexPPDirectiveBody(First, End); + pushDirective(Kind); + return false; + } + + if (Id != "clang") { + skipLine(First, End); + return false; + } + + // #pragma clang. + if (!isNextIdentifier("module", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module. + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module import. + lexPPDirectiveBody(First, End); + pushDirective(pp_pragma_import); + return false; +} + +bool Scanner::lexEndif(const char *&First, const char *const End) { + // Strip out "#else" if it's empty. + if (topDirective() == pp_else) + popDirective(); + + // If "#ifdef" is empty, strip it and skip the "#endif". + // + // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, + // we can skip empty `#if` and `#elif` blocks as well after scanning for a + // literal __has_include in the condition. Even without that rule we could + // drop the tokens if we scan for identifiers in the condition and find none. + if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) { + popDirective(); + skipLine(First, End); + return false; + } + + return lexDefault(pp_endif, First, End); +} + +bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, + const char *const End) { + lexPPDirectiveBody(First, End); + pushDirective(Kind); + return false; +} + +static bool isStartOfRelevantLine(char First) { + switch (First) { + case '#': + case '@': + case 'i': + case 'e': + case 'm': + return true; + } + return false; +} + +bool Scanner::lexPPLine(const char *&First, const char *const End) { + assert(First != End); + + skipWhitespace(First, End); + assert(First <= End); + if (First == End) + return false; + + if (!isStartOfRelevantLine(*First)) { + skipLine(First, End); + assert(First <= End); + return false; + } + + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); + + auto ScEx1 = make_scope_exit([&]() { + /// Clear Scanner's CurDirToks before returning, in case we didn't push a + /// new directive. + CurDirToks.clear(); + }); + + // Handle "@import". + if (*First == '@') + return lexAt(First, End); + + if (*First == 'i' || *First == 'e' || *First == 'm') + return lexModule(First, End); + + // Handle preprocessing directives. + + TheLexer.setParsingPreprocessorDirective(true); + auto ScEx2 = make_scope_exit( + [&]() { TheLexer.setParsingPreprocessorDirective(false); }); + + // Lex '#'. + const dependency_directives_scan::Token &HashTok = lexToken(First, End); + assert(HashTok.is(tok::hash)); + (void)HashTok; + + Optional FoundId = tryLexIdentifier(First, End); + + if (!FoundId) { + skipLine(First, End); + return false; + } + + StringRef Id = FoundId.getValue(); + + if (Id == "pragma") + return lexPragma(First, End); + + auto Kind = llvm::StringSwitch(Id) + .Case("include", pp_include) + .Case("__include_macros", pp___include_macros) + .Case("define", pp_define) + .Case("undef", pp_undef) + .Case("import", pp_import) + .Case("include_next", pp_include_next) + .Case("if", pp_if) + .Case("ifdef", pp_ifdef) + .Case("ifndef", pp_ifndef) + .Case("elif", pp_elif) + .Case("elifdef", pp_elifdef) + .Case("elifndef", pp_elifndef) + .Case("else", pp_else) + .Case("endif", pp_endif) + .Default(pp_none); + if (Kind == pp_none) { + skipDirective(Id, First, End); + return false; + } + + if (Kind == pp_endif) + return lexEndif(First, End); + + switch (Kind) { + case pp_include: + case pp___include_macros: + case pp_include_next: + case pp_import: + lexIncludeFilename(First, End); + break; + default: + break; + } + + // Everything else. + return lexDefault(Kind, First, End); +} + +static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { + if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && + First[2] == '\xbf') + First += 3; +} + +bool Scanner::scanImpl(const char *First, const char *const End) { + skipUTF8ByteOrderMark(First, End); + while (First != End) + if (lexPPLine(First, End)) + return true; + return false; +} + +bool Scanner::scan(SmallVectorImpl &Directives) { + bool Error = scanImpl(Input.begin(), Input.end()); + + if (!Error) { + // Add an EOF on success. + pushDirective(pp_eof); + } + + ArrayRef RemainingTokens = Tokens; + for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { + assert(RemainingTokens.size() >= DirWithToks.NumTokens); + Directives.emplace_back(DirWithToks.Kind, + RemainingTokens.take_front(DirWithToks.NumTokens)); + RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); + } + assert(RemainingTokens.empty()); + + return Error; +} + +bool clang::scanSourceForDependencyDirectives( + StringRef Input, SmallVectorImpl &Tokens, + SmallVectorImpl &Directives, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) { + return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); +} + +void clang::printDependencyDirectivesAsSource( + StringRef Source, + ArrayRef Directives, + llvm::raw_ostream &OS) { + // Add a space separator where it is convenient for testing purposes. + auto needsSpaceSeparator = + [](tok::TokenKind Prev, + const dependency_directives_scan::Token &Tok) -> bool { + if (Prev == Tok.Kind) + return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, + tok::r_square); + if (Prev == tok::raw_identifier && + Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, + tok::char_constant, tok::header_name)) + return true; + if (Prev == tok::r_paren && Tok.isOneOf(tok::hash, tok::string_literal, + tok::char_constant, tok::unknown)) + return true; + return false; + }; + + for (const dependency_directives_scan::Directive &Directive : Directives) { + Optional PrevTokenKind; + for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { + if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) + OS << ' '; + PrevTokenKind = Tok.Kind; + OS << Source.slice(Tok.Offset, Tok.getEnd()); + } + } +} diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp deleted file mode 100644 --- a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ /dev/null @@ -1,991 +0,0 @@ -//===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This is the implementation for minimizing header and source files to the -/// minimum necessary preprocessor directives for evaluating includes. It -/// reduces the source down to #define, #include, #import, @import, and any -/// conditional preprocessor logic that contains one of those. -/// -//===----------------------------------------------------------------------===// - -#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" -#include "clang/Basic/CharInfo.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Lex/LexDiagnostic.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/MemoryBuffer.h" - -using namespace llvm; -using namespace clang; -using namespace clang::minimize_source_to_dependency_directives; - -namespace { - -struct Minimizer { - /// Minimized output. - SmallVectorImpl &Out; - /// The known tokens encountered during the minimization. - SmallVectorImpl &Tokens; - - Minimizer(SmallVectorImpl &Out, SmallVectorImpl &Tokens, - StringRef Input, DiagnosticsEngine *Diags, - SourceLocation InputSourceLoc) - : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags), - InputSourceLoc(InputSourceLoc) {} - - /// Lex the provided source and emit the minimized output. - /// - /// \returns True on error. - bool minimize(); - -private: - struct IdInfo { - const char *Last; - StringRef Name; - }; - - /// Lex an identifier. - /// - /// \pre First points at a valid identifier head. - LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); - LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, - const char *const End); - LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); - LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); - LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); - LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); - LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); - LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, - const char *&First, const char *const End); - Token &makeToken(TokenKind K) { - Tokens.emplace_back(K, Out.size()); - return Tokens.back(); - } - void popToken() { - Out.resize(Tokens.back().Offset); - Tokens.pop_back(); - } - TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } - - Minimizer &put(char Byte) { - Out.push_back(Byte); - return *this; - } - Minimizer &append(StringRef S) { return append(S.begin(), S.end()); } - Minimizer &append(const char *First, const char *Last) { - Out.append(First, Last); - return *this; - } - - void printToNewline(const char *&First, const char *const End); - void printAdjacentModuleNameParts(const char *&First, const char *const End); - LLVM_NODISCARD bool printAtImportBody(const char *&First, - const char *const End); - void printDirectiveBody(const char *&First, const char *const End); - void printAdjacentMacroArgs(const char *&First, const char *const End); - LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); - - /// Reports a diagnostic if the diagnostic engine is provided. Always returns - /// true at the end. - bool reportError(const char *CurPtr, unsigned Err); - - StringMap SplitIds; - StringRef Input; - DiagnosticsEngine *Diags; - SourceLocation InputSourceLoc; -}; - -} // end anonymous namespace - -bool Minimizer::reportError(const char *CurPtr, unsigned Err) { - if (!Diags) - return true; - assert(CurPtr >= Input.data() && "invalid buffer ptr"); - Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); - return true; -} - -static void skipOverSpaces(const char *&First, const char *const End) { - while (First != End && isHorizontalWhitespace(*First)) - ++First; -} - -LLVM_NODISCARD static bool isRawStringLiteral(const char *First, - const char *Current) { - assert(First <= Current); - - // Check if we can even back up. - if (*Current != '"' || First == Current) - return false; - - // Check for an "R". - --Current; - if (*Current != 'R') - return false; - if (First == Current || !isAsciiIdentifierContinue(*--Current)) - return true; - - // Check for a prefix of "u", "U", or "L". - if (*Current == 'u' || *Current == 'U' || *Current == 'L') - return First == Current || !isAsciiIdentifierContinue(*--Current); - - // Check for a prefix of "u8". - if (*Current != '8' || First == Current || *Current-- != 'u') - return false; - return First == Current || !isAsciiIdentifierContinue(*--Current); -} - -static void skipRawString(const char *&First, const char *const End) { - assert(First[0] == '"'); - assert(First[-1] == 'R'); - - const char *Last = ++First; - while (Last != End && *Last != '(') - ++Last; - if (Last == End) { - First = Last; // Hit the end... just give up. - return; - } - - StringRef Terminator(First, Last - First); - for (;;) { - // Move First to just past the next ")". - First = Last; - while (First != End && *First != ')') - ++First; - if (First == End) - return; - ++First; - - // Look ahead for the terminator sequence. - Last = First; - while (Last != End && size_t(Last - First) < Terminator.size() && - Terminator[Last - First] == *Last) - ++Last; - - // Check if we hit it (or the end of the file). - if (Last == End) { - First = Last; - return; - } - if (size_t(Last - First) < Terminator.size()) - continue; - if (*Last != '"') - continue; - First = Last + 1; - return; - } -} - -// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) -static unsigned isEOL(const char *First, const char *const End) { - if (First == End) - return 0; - if (End - First > 1 && isVerticalWhitespace(First[0]) && - isVerticalWhitespace(First[1]) && First[0] != First[1]) - return 2; - return !!isVerticalWhitespace(First[0]); -} - -static void skipString(const char *&First, const char *const End) { - assert(*First == '\'' || *First == '"' || *First == '<'); - const char Terminator = *First == '<' ? '>' : *First; - for (++First; First != End && *First != Terminator; ++First) { - // String and character literals don't extend past the end of the line. - if (isVerticalWhitespace(*First)) - return; - if (*First != '\\') - continue; - // Skip past backslash to the next character. This ensures that the - // character right after it is skipped as well, which matters if it's - // the terminator. - if (++First == End) - return; - if (!isWhitespace(*First)) - continue; - // Whitespace after the backslash might indicate a line continuation. - const char *FirstAfterBackslashPastSpace = First; - skipOverSpaces(FirstAfterBackslashPastSpace, End); - if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { - // Advance the character pointer to the next line for the next - // iteration. - First = FirstAfterBackslashPastSpace + NLSize - 1; - } - } - if (First != End) - ++First; // Finish off the string. -} - -// Returns the length of the skipped newline -static unsigned skipNewline(const char *&First, const char *End) { - if (First == End) - return 0; - assert(isVerticalWhitespace(*First)); - unsigned Len = isEOL(First, End); - assert(Len && "expected newline"); - First += Len; - return Len; -} - -static bool wasLineContinuation(const char *First, unsigned EOLLen) { - return *(First - (int)EOLLen - 1) == '\\'; -} - -static void skipToNewlineRaw(const char *&First, const char *const End) { - for (;;) { - if (First == End) - return; - - unsigned Len = isEOL(First, End); - if (Len) - return; - - do { - if (++First == End) - return; - Len = isEOL(First, End); - } while (!Len); - - if (First[-1] != '\\') - return; - - First += Len; - // Keep skipping lines... - } -} - -static const char *findLastNonSpace(const char *First, const char *Last) { - assert(First <= Last); - while (First != Last && isHorizontalWhitespace(Last[-1])) - --Last; - return Last; -} - -static const char *findLastNonSpaceNonBackslash(const char *First, - const char *Last) { - assert(First <= Last); - while (First != Last && - (isHorizontalWhitespace(Last[-1]) || Last[-1] == '\\')) - --Last; - return Last; -} - -static const char *findFirstTrailingSpace(const char *First, - const char *Last) { - const char *LastNonSpace = findLastNonSpace(First, Last); - if (Last == LastNonSpace) - return Last; - assert(isHorizontalWhitespace(LastNonSpace[0])); - return LastNonSpace + 1; -} - -static void skipLineComment(const char *&First, const char *const End) { - assert(First[0] == '/' && First[1] == '/'); - First += 2; - skipToNewlineRaw(First, End); -} - -static void skipBlockComment(const char *&First, const char *const End) { - assert(First[0] == '/' && First[1] == '*'); - if (End - First < 4) { - First = End; - return; - } - for (First += 3; First != End; ++First) - if (First[-1] == '*' && First[0] == '/') { - ++First; - return; - } -} - -/// \returns True if the current single quotation mark character is a C++ 14 -/// digit separator. -static bool isQuoteCppDigitSeparator(const char *const Start, - const char *const Cur, - const char *const End) { - assert(*Cur == '\'' && "expected quotation character"); - // skipLine called in places where we don't expect a valid number - // body before `start` on the same line, so always return false at the start. - if (Start == Cur) - return false; - // The previous character must be a valid PP number character. - // Make sure that the L, u, U, u8 prefixes don't get marked as a - // separator though. - char Prev = *(Cur - 1); - if (Prev == 'L' || Prev == 'U' || Prev == 'u') - return false; - if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') - return false; - if (!isPreprocessingNumberBody(Prev)) - return false; - // The next character should be a valid identifier body character. - return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); -} - -static void skipLine(const char *&First, const char *const End) { - for (;;) { - assert(First <= End); - if (First == End) - return; - - if (isVerticalWhitespace(*First)) { - skipNewline(First, End); - return; - } - const char *Start = First; - while (First != End && !isVerticalWhitespace(*First)) { - // Iterate over strings correctly to avoid comments and newlines. - if (*First == '"' || - (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { - if (isRawStringLiteral(Start, First)) - skipRawString(First, End); - else - skipString(First, End); - continue; - } - - // Iterate over comments correctly. - if (*First != '/' || End - First < 2) { - ++First; - continue; - } - - if (First[1] == '/') { - // "//...". - skipLineComment(First, End); - continue; - } - - if (First[1] != '*') { - ++First; - continue; - } - - // "/*...*/". - skipBlockComment(First, End); - } - if (First == End) - return; - - // Skip over the newline. - unsigned Len = skipNewline(First, End); - if (!wasLineContinuation(First, Len)) // Continue past line-continuations. - break; - } -} - -static void skipDirective(StringRef Name, const char *&First, - const char *const End) { - if (llvm::StringSwitch(Name) - .Case("warning", true) - .Case("error", true) - .Default(false)) - // Do not process quotes or comments. - skipToNewlineRaw(First, End); - else - skipLine(First, End); -} - -void Minimizer::printToNewline(const char *&First, const char *const End) { - while (First != End && !isVerticalWhitespace(*First)) { - const char *Last = First; - do { - // Iterate over strings correctly to avoid comments and newlines. - if (*Last == '"' || *Last == '\'' || - (*Last == '<' && (top() == pp_include || top() == pp_import))) { - if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) - skipRawString(Last, End); - else - skipString(Last, End); - continue; - } - if (*Last != '/' || End - Last < 2) { - ++Last; - continue; // Gather the rest up to print verbatim. - } - - if (Last[1] != '/' && Last[1] != '*') { - ++Last; - continue; - } - - // Deal with "//..." and "/*...*/". - append(First, findFirstTrailingSpace(First, Last)); - First = Last; - - if (Last[1] == '/') { - skipLineComment(First, End); - return; - } - - put(' '); - skipBlockComment(First, End); - skipOverSpaces(First, End); - Last = First; - } while (Last != End && !isVerticalWhitespace(*Last)); - - // Print out the string. - const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last); - if (Last == End || LastBeforeTrailingSpace == First || - LastBeforeTrailingSpace[-1] != '\\') { - append(First, LastBeforeTrailingSpace); - First = Last; - skipNewline(First, End); - return; - } - - // Print up to the last character that's not a whitespace or backslash. - // Then print exactly one space, which matters when tokens are separated by - // a line continuation. - append(First, findLastNonSpaceNonBackslash(First, Last)); - put(' '); - - First = Last; - skipNewline(First, End); - skipOverSpaces(First, End); - } -} - -static void skipWhitespace(const char *&First, const char *const End) { - for (;;) { - assert(First <= End); - skipOverSpaces(First, End); - - if (End - First < 2) - return; - - if (First[0] == '\\' && isVerticalWhitespace(First[1])) { - skipNewline(++First, End); - continue; - } - - // Check for a non-comment character. - if (First[0] != '/') - return; - - // "// ...". - if (First[1] == '/') { - skipLineComment(First, End); - return; - } - - // Cannot be a comment. - if (First[1] != '*') - return; - - // "/*...*/". - skipBlockComment(First, End); - } -} - -void Minimizer::printAdjacentModuleNameParts(const char *&First, - const char *const End) { - // Skip over parts of the body. - const char *Last = First; - do - ++Last; - while (Last != End && (isAsciiIdentifierContinue(*Last) || *Last == '.')); - append(First, Last); - First = Last; -} - -bool Minimizer::printAtImportBody(const char *&First, const char *const End) { - for (;;) { - skipWhitespace(First, End); - if (First == End) - return true; - - if (isVerticalWhitespace(*First)) { - skipNewline(First, End); - continue; - } - - // Found a semicolon. - if (*First == ';') { - put(*First++).put('\n'); - return false; - } - - // Don't handle macro expansions inside @import for now. - if (!isAsciiIdentifierContinue(*First) && *First != '.') - return true; - - printAdjacentModuleNameParts(First, End); - } -} - -void Minimizer::printDirectiveBody(const char *&First, const char *const End) { - skipWhitespace(First, End); // Skip initial whitespace. - printToNewline(First, End); - while (Out.back() == ' ') - Out.pop_back(); - put('\n'); -} - -LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, - const char *const End) { - assert(isAsciiIdentifierContinue(*First) && "invalid identifer"); - const char *Last = First + 1; - while (Last != End && isAsciiIdentifierContinue(*Last)) - ++Last; - return Last; -} - -LLVM_NODISCARD static const char * -getIdentifierContinuation(const char *First, const char *const End) { - if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) - return nullptr; - - ++First; - skipNewline(First, End); - if (First == End) - return nullptr; - return isAsciiIdentifierContinue(First[0]) ? First : nullptr; -} - -Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, - const char *const End) { - const char *Last = lexRawIdentifier(First, End); - const char *Next = getIdentifierContinuation(Last, End); - if (LLVM_LIKELY(!Next)) - return IdInfo{Last, StringRef(First, Last - First)}; - - // Slow path, where identifiers are split over lines. - SmallVector Id(First, Last); - while (Next) { - Last = lexRawIdentifier(Next, End); - Id.append(Next, Last); - Next = getIdentifierContinuation(Last, End); - } - return IdInfo{ - Last, - SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; -} - -void Minimizer::printAdjacentMacroArgs(const char *&First, - const char *const End) { - // Skip over parts of the body. - const char *Last = First; - do - ++Last; - while (Last != End && - (isAsciiIdentifierContinue(*Last) || *Last == '.' || *Last == ',')); - append(First, Last); - First = Last; -} - -bool Minimizer::printMacroArgs(const char *&First, const char *const End) { - assert(*First == '('); - put(*First++); - for (;;) { - skipWhitespace(First, End); - if (First == End) - return true; - - if (*First == ')') { - put(*First++); - return false; - } - - // This is intentionally fairly liberal. - if (!(isAsciiIdentifierContinue(*First) || *First == '.' || *First == ',')) - return true; - - printAdjacentMacroArgs(First, End); - } -} - -/// Looks for an identifier starting from Last. -/// -/// Updates "First" to just past the next identifier, if any. Returns true iff -/// the identifier matches "Id". -bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, - const char *const End) { - skipWhitespace(First, End); - if (First == End || !isAsciiIdentifierStart(*First)) - return false; - - IdInfo FoundId = lexIdentifier(First, End); - First = FoundId.Last; - return FoundId.Name == Id; -} - -bool Minimizer::lexAt(const char *&First, const char *const End) { - // Handle "@import". - const char *ImportLoc = First++; - if (!isNextIdentifier("import", First, End)) { - skipLine(First, End); - return false; - } - makeToken(decl_at_import); - append("@import "); - if (printAtImportBody(First, End)) - return reportError( - ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import); - skipWhitespace(First, End); - if (First == End) - return false; - if (!isVerticalWhitespace(*First)) - return reportError( - ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import); - skipNewline(First, End); - return false; -} - -bool Minimizer::lexModule(const char *&First, const char *const End) { - IdInfo Id = lexIdentifier(First, End); - First = Id.Last; - bool Export = false; - if (Id.Name == "export") { - Export = true; - skipWhitespace(First, End); - if (!isAsciiIdentifierContinue(*First)) { - skipLine(First, End); - return false; - } - Id = lexIdentifier(First, End); - First = Id.Last; - } - - if (Id.Name != "module" && Id.Name != "import") { - skipLine(First, End); - return false; - } - - skipWhitespace(First, End); - - // Ignore this as a module directive if the next character can't be part of - // an import. - - switch (*First) { - case ':': - case '<': - case '"': - break; - default: - if (!isAsciiIdentifierContinue(*First)) { - skipLine(First, End); - return false; - } - } - - if (Export) { - makeToken(cxx_export_decl); - append("export "); - } - - if (Id.Name == "module") - makeToken(cxx_module_decl); - else - makeToken(cxx_import_decl); - append(Id.Name); - append(" "); - printToNewline(First, End); - append("\n"); - return false; -} - -bool Minimizer::lexDefine(const char *&First, const char *const End) { - makeToken(pp_define); - append("#define "); - skipWhitespace(First, End); - - if (!isAsciiIdentifierStart(*First)) - return reportError(First, diag::err_pp_macro_not_identifier); - - IdInfo Id = lexIdentifier(First, End); - const char *Last = Id.Last; - append(Id.Name); - if (Last == End) - return false; - if (*Last == '(') { - size_t Size = Out.size(); - if (printMacroArgs(Last, End)) { - // Be robust to bad macro arguments, since they can show up in disabled - // code. - Out.resize(Size); - append("(/* invalid */\n"); - skipLine(Last, End); - return false; - } - } - skipWhitespace(Last, End); - if (Last == End) - return false; - if (!isVerticalWhitespace(*Last)) - put(' '); - printDirectiveBody(Last, End); - First = Last; - return false; -} - -bool Minimizer::lexPragma(const char *&First, const char *const End) { - // #pragma. - skipWhitespace(First, End); - if (First == End || !isAsciiIdentifierStart(*First)) - return false; - - IdInfo FoundId = lexIdentifier(First, End); - First = FoundId.Last; - if (FoundId.Name == "once") { - // #pragma once - skipLine(First, End); - makeToken(pp_pragma_once); - append("#pragma once\n"); - return false; - } - if (FoundId.Name == "push_macro") { - // #pragma push_macro - makeToken(pp_pragma_push_macro); - append("#pragma push_macro"); - printDirectiveBody(First, End); - return false; - } - if (FoundId.Name == "pop_macro") { - // #pragma pop_macro - makeToken(pp_pragma_pop_macro); - append("#pragma pop_macro"); - printDirectiveBody(First, End); - return false; - } - if (FoundId.Name == "include_alias") { - // #pragma include_alias - makeToken(pp_pragma_include_alias); - append("#pragma include_alias"); - printDirectiveBody(First, End); - return false; - } - - if (FoundId.Name != "clang") { - skipLine(First, End); - return false; - } - - // #pragma clang. - if (!isNextIdentifier("module", First, End)) { - skipLine(First, End); - return false; - } - - // #pragma clang module. - if (!isNextIdentifier("import", First, End)) { - skipLine(First, End); - return false; - } - - // #pragma clang module import. - makeToken(pp_pragma_import); - append("#pragma clang module import "); - printDirectiveBody(First, End); - return false; -} - -bool Minimizer::lexEndif(const char *&First, const char *const End) { - // Strip out "#else" if it's empty. - if (top() == pp_else) - popToken(); - - // If "#ifdef" is empty, strip it and skip the "#endif". - // - // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, - // we can skip empty `#if` and `#elif` blocks as well after scanning for a - // literal __has_include in the condition. Even without that rule we could - // drop the tokens if we scan for identifiers in the condition and find none. - if (top() == pp_ifdef || top() == pp_ifndef) { - popToken(); - skipLine(First, End); - return false; - } - - return lexDefault(pp_endif, "endif", First, End); -} - -bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, - const char *&First, const char *const End) { - makeToken(Kind); - put('#').append(Directive).put(' '); - printDirectiveBody(First, End); - return false; -} - -static bool isStartOfRelevantLine(char First) { - switch (First) { - case '#': - case '@': - case 'i': - case 'e': - case 'm': - return true; - } - return false; -} - -bool Minimizer::lexPPLine(const char *&First, const char *const End) { - assert(First != End); - - skipWhitespace(First, End); - assert(First <= End); - if (First == End) - return false; - - if (!isStartOfRelevantLine(*First)) { - skipLine(First, End); - assert(First <= End); - return false; - } - - // Handle "@import". - if (*First == '@') - return lexAt(First, End); - - if (*First == 'i' || *First == 'e' || *First == 'm') - return lexModule(First, End); - - // Handle preprocessing directives. - ++First; // Skip over '#'. - skipWhitespace(First, End); - - if (First == End) - return reportError(First, diag::err_pp_expected_eol); - - if (!isAsciiIdentifierStart(*First)) { - skipLine(First, End); - return false; - } - - // Figure out the token. - IdInfo Id = lexIdentifier(First, End); - First = Id.Last; - - if (Id.Name == "pragma") - return lexPragma(First, End); - - auto Kind = llvm::StringSwitch(Id.Name) - .Case("include", pp_include) - .Case("__include_macros", pp___include_macros) - .Case("define", pp_define) - .Case("undef", pp_undef) - .Case("import", pp_import) - .Case("include_next", pp_include_next) - .Case("if", pp_if) - .Case("ifdef", pp_ifdef) - .Case("ifndef", pp_ifndef) - .Case("elif", pp_elif) - .Case("elifdef", pp_elifdef) - .Case("elifndef", pp_elifndef) - .Case("else", pp_else) - .Case("endif", pp_endif) - .Default(pp_none); - if (Kind == pp_none) { - skipDirective(Id.Name, First, End); - return false; - } - - if (Kind == pp_endif) - return lexEndif(First, End); - - if (Kind == pp_define) - return lexDefine(First, End); - - // Everything else. - return lexDefault(Kind, Id.Name, First, End); -} - -static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { - if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && - First[2] == '\xbf') - First += 3; -} - -bool Minimizer::minimizeImpl(const char *First, const char *const End) { - skipUTF8ByteOrderMark(First, End); - while (First != End) - if (lexPPLine(First, End)) - return true; - return false; -} - -bool Minimizer::minimize() { - bool Error = minimizeImpl(Input.begin(), Input.end()); - - if (!Error) { - // Add a trailing newline and an EOF on success. - if (!Out.empty() && Out.back() != '\n') - Out.push_back('\n'); - makeToken(pp_eof); - } - - // Null-terminate the output. This way the memory buffer that's passed to - // Clang will not have to worry about the terminating '\0'. - Out.push_back(0); - Out.pop_back(); - return Error; -} - -bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( - ArrayRef Input, llvm::SmallVectorImpl &Range) { - struct Directive { - enum DirectiveKind { - If, // if/ifdef/ifndef - Else // elif/elifdef/elifndef, else - }; - int Offset; - DirectiveKind Kind; - }; - llvm::SmallVector Offsets; - for (const Token &T : Input) { - switch (T.K) { - case pp_if: - case pp_ifdef: - case pp_ifndef: - Offsets.push_back({T.Offset, Directive::If}); - break; - - case pp_elif: - case pp_elifdef: - case pp_elifndef: - case pp_else: { - if (Offsets.empty()) - return true; - int PreviousOffset = Offsets.back().Offset; - Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); - Offsets.push_back({T.Offset, Directive::Else}); - break; - } - - case pp_endif: { - if (Offsets.empty()) - return true; - int PreviousOffset = Offsets.back().Offset; - Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); - do { - Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind; - if (Kind == Directive::If) - break; - } while (!Offsets.empty()); - break; - } - default: - break; - } - } - return false; -} - -bool clang::minimizeSourceToDependencyDirectives( - StringRef Input, SmallVectorImpl &Output, - SmallVectorImpl &Tokens, DiagnosticsEngine *Diags, - SourceLocation InputSourceLoc) { - Output.clear(); - Tokens.clear(); - return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize(); -} diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -226,13 +226,11 @@ return L; } -bool Lexer::skipOver(unsigned NumBytes) { - IsAtPhysicalStartOfLine = true; - IsAtStartOfLine = true; - if ((BufferPtr + NumBytes) > BufferEnd) - return true; - BufferPtr += NumBytes; - return false; +void Lexer::seek(unsigned Offset, bool IsAtStartOfLine) { + this->IsAtPhysicalStartOfLine = IsAtStartOfLine; + this->IsAtStartOfLine = IsAtStartOfLine; + assert((BufferStart + Offset) <= BufferEnd); + BufferPtr = BufferStart + Offset; } template static void StringifyImpl(T &Str, char Quote) { @@ -2939,6 +2937,13 @@ unsigned Lexer::isNextPPTokenLParen() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); + if (isDependencyDirectivesLexer()) { + if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) + return 2; + return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( + tok::l_paren); + } + // Switch to 'skipping' mode. This will ensure that we can lex a token // without emitting diagnostics, disables macro expansion, and will cause EOF // to return an EOF token instead of popping the include stack. @@ -3281,6 +3286,8 @@ } bool Lexer::Lex(Token &Result) { + assert(!isDependencyDirectivesLexer()); + // Start a new token. Result.startToken(); @@ -4102,3 +4109,129 @@ // We parsed the directive; lex a token with the new state. return false; } + +const char *Lexer::convertDependencyDirectiveToken( + const dependency_directives_scan::Token &DDTok, Token &Result) { + const char *TokPtr = BufferStart + DDTok.Offset; + Result.startToken(); + Result.setLocation(getSourceLocation(TokPtr)); + Result.setKind(DDTok.Kind); + Result.setFlag((Token::TokenFlags)DDTok.Flags); + Result.setLength(DDTok.Length); + BufferPtr = TokPtr + DDTok.Length; + return TokPtr; +} + +bool Lexer::LexDependencyDirectiveToken(Token &Result) { + assert(isDependencyDirectivesLexer()); + + using namespace dependency_directives_scan; + + while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) { + if (DepDirectives.front().Kind == pp_eof) + return LexEndOfFile(Result, BufferEnd); + NextDepDirectiveTokenIndex = 0; + DepDirectives = DepDirectives.drop_front(); + } + + const dependency_directives_scan::Token &DDTok = + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++]; + + const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result); + + if (Result.is(tok::hash) && Result.isAtStartOfLine()) { + PP->HandleDirective(Result); + return false; + } + if (Result.is(tok::raw_identifier)) { + Result.setRawIdentifierData(TokPtr); + if (!isLexingRawMode()) { + IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (II->isHandleIdentifierCase()) + return PP->HandleIdentifier(Result); + } + return true; + } + if (Result.isLiteral()) { + Result.setLiteralData(TokPtr); + return true; + } + if (Result.is(tok::colon) && + (LangOpts.CPlusPlus || LangOpts.DoubleSquareBracketAttributes)) { + // Convert consecutive colons to 'tok::coloncolon'. + if (*BufferPtr == ':') { + assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( + tok::colon)); + ++NextDepDirectiveTokenIndex; + Result.setKind(tok::coloncolon); + } + return true; + } + if (Result.is(tok::eod)) + ParsingPreprocessorDirective = false; + + return true; +} + +bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) { + assert(isDependencyDirectivesLexer()); + + using namespace dependency_directives_scan; + + bool Stop = false; + unsigned NestedIfs = 0; + do { + DepDirectives = DepDirectives.drop_front(); + switch (DepDirectives.front().Kind) { + case pp_none: + llvm_unreachable("unexpected 'pp_none'"); + case pp_include: + case pp___include_macros: + case pp_define: + case pp_undef: + case pp_import: + case pp_pragma_import: + case pp_pragma_once: + case pp_pragma_push_macro: + case pp_pragma_pop_macro: + case pp_pragma_include_alias: + case pp_include_next: + case decl_at_import: + case cxx_module_decl: + case cxx_import_decl: + case cxx_export_module_decl: + case cxx_export_import_decl: + break; + case pp_if: + case pp_ifdef: + case pp_ifndef: + ++NestedIfs; + break; + case pp_elif: + case pp_elifdef: + case pp_elifndef: + case pp_else: + if (!NestedIfs) { + Stop = true; + } + break; + case pp_endif: + if (!NestedIfs) { + Stop = true; + } else { + --NestedIfs; + } + break; + case pp_eof: + return LexEndOfFile(Result, BufferEnd); + } + } while (!Stop); + + const dependency_directives_scan::Token &DDTok = + DepDirectives.front().Tokens.front(); + assert(DDTok.is(tok::hash)); + NextDepDirectiveTokenIndex = 1; + + convertDependencyDirectiveToken(DDTok, Result); + return false; +} diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -398,41 +398,6 @@ return DiscardUntilEndOfDirective().getEnd(); } -Optional Preprocessor::getSkippedRangeForExcludedConditionalBlock( - SourceLocation HashLoc) { - if (!ExcludedConditionalDirectiveSkipMappings) - return None; - if (!HashLoc.isFileID()) - return None; - - std::pair HashFileOffset = - SourceMgr.getDecomposedLoc(HashLoc); - Optional Buf = - SourceMgr.getBufferOrNone(HashFileOffset.first); - if (!Buf) - return None; - auto It = - ExcludedConditionalDirectiveSkipMappings->find(Buf->getBufferStart()); - if (It == ExcludedConditionalDirectiveSkipMappings->end()) - return None; - - const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond(); - // Check if the offset of '#' is mapped in the skipped ranges. - auto MappingIt = SkippedRanges.find(HashFileOffset.second); - if (MappingIt == SkippedRanges.end()) - return None; - - unsigned BytesToSkip = MappingIt->getSecond(); - unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset(); - assert(CurLexerBufferOffset >= HashFileOffset.second && - "lexer is before the hash?"); - // Take into account the fact that the lexer has already advanced, so the - // number of bytes to skip must be adjusted. - unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second; - assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?"); - return BytesToSkip - LengthDiff; -} - /// SkipExcludedConditionalBlock - We just read a \#if or related directive and /// decided that the subsequent tokens are in the \#if'd out portion of the /// file. Lex the rest of the file, until we see an \#endif. If @@ -459,36 +424,42 @@ // disabling warnings, etc. CurPPLexer->LexingRawMode = true; Token Tok; - if (auto SkipLength = - getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) { - // Skip to the next '#endif' / '#else' / '#elif'. - CurLexer->skipOver(*SkipLength); - } SourceLocation endLoc; while (true) { - CurLexer->Lex(Tok); + if (CurLexer->isDependencyDirectivesLexer()) { + CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok); + } else { + while (true) { + CurLexer->Lex(Tok); - if (Tok.is(tok::code_completion)) { - setCodeCompletionReached(); - if (CodeComplete) - CodeComplete->CodeCompleteInConditionalExclusion(); - continue; - } + if (Tok.is(tok::code_completion)) { + setCodeCompletionReached(); + if (CodeComplete) + CodeComplete->CodeCompleteInConditionalExclusion(); + continue; + } - // If this is the end of the buffer, we have an error. - if (Tok.is(tok::eof)) { - // We don't emit errors for unterminated conditionals here, - // Lexer::LexEndOfFile can do that properly. - // Just return and let the caller lex after this #include. - if (PreambleConditionalStack.isRecording()) - PreambleConditionalStack.SkipInfo.emplace( - HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc); - break; - } + // If this is the end of the buffer, we have an error. + if (Tok.is(tok::eof)) { + // We don't emit errors for unterminated conditionals here, + // Lexer::LexEndOfFile can do that properly. + // Just return and let the caller lex after this #include. + if (PreambleConditionalStack.isRecording()) + PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc, + FoundNonSkipPortion, + FoundElse, ElseLoc); + break; + } - // If this token is not a preprocessor directive, just skip it. - if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) - continue; + // If this token is not a preprocessor directive, just skip it. + if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) + continue; + + break; + } + } + if (Tok.is(tok::eof)) + break; // We just parsed a # character at the start of a line, so we're in // directive mode. Tell the lexer this so any newlines we see will be diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -91,8 +91,19 @@ CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset); } - EnterSourceFileWithLexer( - new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile), CurDir); + Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile); + if (getPreprocessorOpts().DependencyDirectivesForFile && + FID != PredefinesFileID) { + if (Optional File = SourceMgr.getFileEntryRefForID(FID)) { + if (Optional> + DepDirectives = + getPreprocessorOpts().DependencyDirectivesForFile(*File)) { + TheLexer->DepDirectives = *DepDirectives; + } + } + } + + EnterSourceFileWithLexer(TheLexer, CurDir); return false; } @@ -110,7 +121,9 @@ CurDirLookup = CurDir; CurLexerSubmodule = nullptr; if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_Lexer; + CurLexerKind = TheLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -158,11 +158,6 @@ if (this->PPOpts->GeneratePreamble) PreambleConditionalStack.startRecording(); - ExcludedConditionalDirectiveSkipMappings = - this->PPOpts->ExcludedConditionalDirectiveSkipMappings; - if (ExcludedConditionalDirectiveSkipMappings) - ExcludedConditionalDirectiveSkipMappings->clear(); - MaxTokens = LangOpts.MaxTokens; } @@ -382,7 +377,9 @@ void Preprocessor::recomputeCurLexerKind() { if (CurLexer) - CurLexerKind = CLK_Lexer; + CurLexerKind = CurLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; else if (CurTokenLexer) CurLexerKind = CLK_TokenLexer; else @@ -645,6 +642,9 @@ case CLK_CachingLexer: CachingLex(Tok); break; + case CLK_DependencyDirectivesLexer: + CurLexer->LexDependencyDirectiveToken(Tok); + break; case CLK_LexAfterModuleImport: LexAfterModuleImport(Tok); break; @@ -906,6 +906,9 @@ CachingLex(Result); ReturnedToken = true; break; + case CLK_DependencyDirectivesLexer: + ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result); + break; case CLK_LexAfterModuleImport: ReturnedToken = LexAfterModuleImport(Result); break; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" -#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/Threading.h" @@ -41,69 +40,44 @@ return TentativeEntry(Stat, std::move(Buffer)); } -EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary( +EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary( const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { if (Entry.isError() || Entry.isDirectory() || Disable || - !shouldMinimize(Filename, Entry.getUniqueID())) - return EntryRef(/*Minimized=*/false, Filename, Entry); + !shouldScanForDirectives(Filename, Entry.getUniqueID())) + return EntryRef(Filename, Entry); - CachedFileContents *Contents = Entry.getContents(); + CachedFileContents *Contents = Entry.getCachedContents(); assert(Contents && "contents not initialized"); // Double-checked locking. - if (Contents->MinimizedAccess.load()) - return EntryRef(/*Minimized=*/true, Filename, Entry); + if (Contents->DepDirectives.load()) + return EntryRef(Filename, Entry); std::lock_guard GuardLock(Contents->ValueLock); // Double-checked locking. - if (Contents->MinimizedAccess.load()) - return EntryRef(/*Minimized=*/true, Filename, Entry); - - llvm::SmallString<1024> MinimizedFileContents; - // Minimize the file down to directives that might affect the dependencies. - SmallVector Tokens; - if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(), - MinimizedFileContents, Tokens)) { + if (Contents->DepDirectives.load()) + return EntryRef(Filename, Entry); + + SmallVector Directives; + // Scan the file for preprocessor directives that might affect the + // dependencies. + if (scanSourceForDependencyDirectives(Contents->Contents->getBuffer(), + Contents->DepDirectiveTokens, + Directives)) { + Contents->DepDirectiveTokens.clear(); // FIXME: Propagate the diagnostic if desired by the client. - // Use the original file if the minimization failed. - Contents->MinimizedStorage = - llvm::MemoryBuffer::getMemBuffer(*Contents->Original); - Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); - return EntryRef(/*Minimized=*/true, Filename, Entry); + Contents->DepDirectives.store(new Optional()); + return EntryRef(Filename, Entry); } - // The contents produced by the minimizer must be null terminated. - assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && - "not null terminated contents"); - - // Compute the skipped PP ranges that speedup skipping over inactive - // preprocessor blocks. - llvm::SmallVector - SkippedRanges; - minimize_source_to_dependency_directives::computeSkippedRanges(Tokens, - SkippedRanges); - PreprocessorSkippedRangeMapping Mapping; - for (const auto &Range : SkippedRanges) { - if (Range.Length < 16) { - // Ignore small ranges as non-profitable. - // FIXME: This is a heuristic, its worth investigating the tradeoffs - // when it should be applied. - continue; - } - Mapping[Range.Offset] = Range.Length; - } - Contents->PPSkippedRangeMapping = std::move(Mapping); - - Contents->MinimizedStorage = std::make_unique( - std::move(MinimizedFileContents)); - // This function performed double-checked locking using `MinimizedAccess`. - // Assigning it must be the last thing this function does. If we were to - // assign it before `PPSkippedRangeMapping`, other threads may skip the - // critical section (`MinimizedAccess != nullptr`) and access the mappings - // that are about to be initialized, leading to a data race. - Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); - return EntryRef(/*Minimized=*/true, Filename, Entry); + // This function performed double-checked locking using `DepDirectives`. + // Assigning it must be the last thing this function does, otherwise other + // threads may skip the + // critical section (`DepDirectives != nullptr`), leading to a data race. + Contents->DepDirectives.store( + new Optional(std::move(Directives))); + return EntryRef(Filename, Entry); } DependencyScanningFilesystemSharedCache:: @@ -189,7 +163,7 @@ /// /// This is kinda hacky, it would be better if we knew what kind of file Clang /// was expecting instead. -static bool shouldMinimizeBasedOnExtension(StringRef Filename) { +static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) { StringRef Ext = llvm::sys::path::extension(Filename); if (Ext.empty()) return true; // C++ standard library @@ -207,22 +181,22 @@ if (Ext.empty()) return false; // This may be the module cache directory. // Only cache stat failures on source files. - return shouldMinimizeBasedOnExtension(Filename); + return shouldScanForDirectivesBasedOnExtension(Filename); } -void DependencyScanningWorkerFilesystem::disableMinimization( +void DependencyScanningWorkerFilesystem::disableDirectivesScanning( StringRef Filename) { // Since we're not done setting up `NotToBeMinimized` yet, we need to disable // minimization explicitly. - if (llvm::ErrorOr Result = - getOrCreateFileSystemEntry(Filename, /*DisableMinimization=*/true)) - NotToBeMinimized.insert(Result->getStatus().getUniqueID()); + if (llvm::ErrorOr Result = getOrCreateFileSystemEntry( + Filename, /*DisableDirectivesScanning=*/true)) + NotToBeScanned.insert(Result->getStatus().getUniqueID()); } -bool DependencyScanningWorkerFilesystem::shouldMinimize( +bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( StringRef Filename, llvm::sys::fs::UniqueID UID) { - return shouldMinimizeBasedOnExtension(Filename) && - !NotToBeMinimized.contains(UID); + return shouldScanForDirectivesBasedOnExtension(Filename) && + !NotToBeScanned.contains(UID); } const CachedFileSystemEntry & @@ -275,14 +249,16 @@ llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( - StringRef Filename, bool DisableMinimization) { + StringRef Filename, bool DisableDirectivesScanning) { if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) - return minimizeIfNecessary(*Entry, Filename, DisableMinimization) + return scanForDirectivesIfNecessary(*Entry, Filename, + DisableDirectivesScanning) .unwrapError(); auto MaybeEntry = computeAndStoreResult(Filename); if (!MaybeEntry) return MaybeEntry.getError(); - return minimizeIfNecessary(*MaybeEntry, Filename, DisableMinimization) + return scanForDirectivesIfNecessary(*MaybeEntry, Filename, + DisableDirectivesScanning) .unwrapError(); } @@ -301,15 +277,13 @@ /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using /// this subclass. -class MinimizedVFSFile final : public llvm::vfs::File { +class DepScanFile final : public llvm::vfs::File { public: - MinimizedVFSFile(std::unique_ptr Buffer, - llvm::vfs::Status Stat) + DepScanFile(std::unique_ptr Buffer, + llvm::vfs::Status Stat) : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} - static llvm::ErrorOr> - create(EntryRef Entry, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings); + static llvm::ErrorOr> create(EntryRef Entry); llvm::ErrorOr status() override { return Stat; } @@ -328,23 +302,19 @@ } // end anonymous namespace -llvm::ErrorOr> MinimizedVFSFile::create( - EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) { +llvm::ErrorOr> +DepScanFile::create(EntryRef Entry) { assert(!Entry.isError() && "error"); if (Entry.isDirectory()) return std::make_error_code(std::errc::is_a_directory); - auto Result = std::make_unique( + auto Result = std::make_unique( llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getStatus().getName(), /*RequiresNullTerminator=*/false), Entry.getStatus()); - const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); - if (EntrySkipMappings && !EntrySkipMappings->empty()) - PPSkipMappings[Result->Buffer->getBufferStart()] = EntrySkipMappings; - return llvm::ErrorOr>( std::unique_ptr(std::move(Result))); } @@ -357,5 +327,5 @@ llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); if (!Result) return Result.getError(); - return MinimizedVFSFile::create(Result.get(), PPSkipMappings); + return DepScanFile::create(Result.get()); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -137,12 +137,11 @@ DependencyScanningAction( StringRef WorkingDirectory, DependencyConsumer &Consumer, llvm::IntrusiveRefCntPtr DepFS, - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings, ScanningOutputFormat Format, bool OptimizeArgs, llvm::Optional ModuleName = None) : WorkingDirectory(WorkingDirectory), Consumer(Consumer), - DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), Format(Format), - OptimizeArgs(OptimizeArgs), ModuleName(ModuleName) {} + DepFS(std::move(DepFS)), Format(Format), OptimizeArgs(OptimizeArgs), + ModuleName(ModuleName) {} bool runInvocation(std::shared_ptr Invocation, FileManager *FileMgr, @@ -183,29 +182,35 @@ // Use the dependency scanning optimized file system if requested to do so. if (DepFS) { - DepFS->enableMinimizationOfAllFiles(); + DepFS->enableDirectivesScanningOfAllFiles(); // Don't minimize any files that contributed to prebuilt modules. The // implicit build validates the modules by comparing the reported sizes of // their inputs to the current state of the filesystem. Minimization would // throw this mechanism off. for (const auto &File : PrebuiltModulesInputFiles) - DepFS->disableMinimization(File.getKey()); + DepFS->disableDirectivesScanning(File.getKey()); // Don't minimize any files that were explicitly passed in the build // settings and that might be opened. for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries) - DepFS->disableMinimization(E.Path); + DepFS->disableDirectivesScanning(E.Path); for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles) - DepFS->disableMinimization(F); + DepFS->disableDirectivesScanning(F); // Support for virtual file system overlays on top of the caching // filesystem. FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation( ScanInstance.getInvocation(), ScanInstance.getDiagnostics(), DepFS)); - // Pass the skip mappings which should speed up excluded conditional block - // skipping in the preprocessor. - ScanInstance.getPreprocessorOpts() - .ExcludedConditionalDirectiveSkipMappings = &PPSkipMappings; + llvm::IntrusiveRefCntPtr localDepFS = + DepFS; + ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile = + [localDepFS = std::move(localDepFS)](FileEntryRef File) + -> Optional> { + if (llvm::ErrorOr Entry = + localDepFS->getOrCreateFileSystemEntry(File.getName())) + return Entry->getDirectiveTokens(); + return None; + }; } // Create the dependency collector that will collect the produced @@ -262,7 +267,6 @@ StringRef WorkingDirectory; DependencyConsumer &Consumer; llvm::IntrusiveRefCntPtr DepFS; - ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings; ScanningOutputFormat Format; bool OptimizeArgs; llvm::Optional ModuleName; @@ -287,9 +291,9 @@ OverlayFS->pushOverlay(InMemoryFS); RealFS = OverlayFS; - if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing) + if (Service.getMode() == ScanningMode::DependencyDirectivesScan) DepFS = new DependencyScanningWorkerFilesystem(Service.getSharedCache(), - RealFS, PPSkipMappings); + RealFS); if (Service.canReuseFileManager()) Files = new FileManager(FileSystemOptions(), RealFS); } @@ -340,8 +344,8 @@ return runWithDiags(CreateAndPopulateDiagOpts(FinalCCommandLine).release(), [&](DiagnosticConsumer &DC, DiagnosticOptions &DiagOpts) { DependencyScanningAction Action( - WorkingDirectory, Consumer, DepFS, PPSkipMappings, - Format, OptimizeArgs, ModuleName); + WorkingDirectory, Consumer, DepFS, Format, + OptimizeArgs, ModuleName); // Create an invocation that uses the underlying file // system to ensure that any file system requests that // are made by the driver do not go through the diff --git a/clang/test/ClangScanDeps/has_include_if_elif.cpp b/clang/test/ClangScanDeps/has_include_if_elif.cpp --- a/clang/test/ClangScanDeps/has_include_if_elif.cpp +++ b/clang/test/ClangScanDeps/has_include_if_elif.cpp @@ -10,7 +10,7 @@ // RUN: cp %S/Inputs/header.h %t.dir/Inputs/header4.h // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/has_include_if_elif.json > %t.cdb // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-directives-scan | \ // RUN: FileCheck %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \ // RUN: FileCheck %s diff --git a/clang/test/ClangScanDeps/macro-expansions.cpp b/clang/test/ClangScanDeps/macro-expansions.cpp new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/macro-expansions.cpp @@ -0,0 +1,37 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json | FileCheck %s + +// CHECK: test.o: +// CHECK-NEXT: test.cpp +// CHECK-NEXT: header1.h +// CHECK-NEXT: header2.h + +//--- cdb.json.template +[{ + "directory" : "DIR", + "command" : "clang -c DIR/test.cpp -o DIR/test.o", + "file" : "DIR/test.o" +}] + +//--- test.cpp +#define FN_MACRO(x) 1 +#if FN_MACRO(a) +#include "header1.h" +#endif + +#if __has_cpp_attribute(clang::fallthrough) +#include "header2.h" +#endif + +//--- header1.h +#ifndef _HEADER1_H_ +#define _HEADER1_H_ +#endif + +//--- header2.h +#ifndef _HEADER2_H_ +#define _HEADER2_H_ +#endif diff --git a/clang/test/ClangScanDeps/modulemap-via-vfs.m b/clang/test/ClangScanDeps/modulemap-via-vfs.m --- a/clang/test/ClangScanDeps/modulemap-via-vfs.m +++ b/clang/test/ClangScanDeps/modulemap-via-vfs.m @@ -3,7 +3,7 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %t.dir/build/compile-commands.json.in > %t.dir/build/compile-commands.json // RUN: sed -e "s|DIR|%/t.dir|g" %t.dir/build/vfs.yaml.in > %t.dir/build/vfs.yaml // RUN: clang-scan-deps -compilation-database %t.dir/build/compile-commands.json -j 1 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources -generate-modules-path-args > %t.db +// RUN: -mode preprocess-directives-scan -generate-modules-path-args > %t.db // RUN: %deps-to-rsp %t.db --module-name=A > %t.A.cc1.rsp // RUN: cat %t.A.cc1.rsp | sed 's:\\\\\?:/:g' | FileCheck %s diff --git a/clang/test/ClangScanDeps/modules-fmodule-name-no-module-built.m b/clang/test/ClangScanDeps/modules-fmodule-name-no-module-built.m --- a/clang/test/ClangScanDeps/modules-fmodule-name-no-module-built.m +++ b/clang/test/ClangScanDeps/modules-fmodule-name-no-module-built.m @@ -10,7 +10,7 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/module_fmodule_name_cdb.json > %t.cdb // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -format experimental-full \ -// RUN: -generate-modules-path-args -mode preprocess-minimized-sources > %t.result +// RUN: -generate-modules-path-args -mode preprocess-directives-scan > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK %s #import "header3.h" diff --git a/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp b/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp --- a/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp +++ b/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp @@ -12,11 +12,11 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb_clangcl_by_mod_name.json > %t_clangcl.cdb // // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources -module-name=header1 > %t.result +// RUN: -mode preprocess-directives-scan -module-name=header1 > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK %s // // RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 4 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources -module-name=header1 > %t_clangcl.result +// RUN: -mode preprocess-directives-scan -module-name=header1 > %t_clangcl.result // RUN: cat %t_clangcl.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK %s // CHECK: { diff --git a/clang/test/ClangScanDeps/modules-full.cpp b/clang/test/ClangScanDeps/modules-full.cpp --- a/clang/test/ClangScanDeps/modules-full.cpp +++ b/clang/test/ClangScanDeps/modules-full.cpp @@ -11,20 +11,20 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb_clangcl.json > %t_clangcl.cdb // // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources > %t.result +// RUN: -mode preprocess-directives-scan > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK,CHECK-NO-ABS %s // // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -format experimental-full \ -// RUN: -generate-modules-path-args -mode preprocess-minimized-sources > %t.result +// RUN: -generate-modules-path-args -mode preprocess-directives-scan > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK,CHECK-ABS %s // // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -format experimental-full \ // RUN: -generate-modules-path-args -module-files-dir %t.dir/custom \ -// RUN: -mode preprocess-minimized-sources > %t.result +// RUN: -mode preprocess-directives-scan > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK,CHECK-CUSTOM %s // // RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 4 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources > %t_clangcl.result +// RUN: -mode preprocess-directives-scan > %t_clangcl.result // RUN: cat %t_clangcl.result | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t.dir --check-prefixes=CHECK,CHECK-NO-ABS %s #include "header.h" diff --git a/clang/test/ClangScanDeps/modules-inferred-explicit-build.m b/clang/test/ClangScanDeps/modules-inferred-explicit-build.m --- a/clang/test/ClangScanDeps/modules-inferred-explicit-build.m +++ b/clang/test/ClangScanDeps/modules-inferred-explicit-build.m @@ -6,7 +6,7 @@ // RUN: %S/Inputs/modules_inferred_cdb.json > %t.cdb // // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -format experimental-full \ -// RUN: -mode preprocess-minimized-sources -generate-modules-path-args > %t.db +// RUN: -mode preprocess-directives-scan -generate-modules-path-args > %t.db // RUN: %deps-to-rsp %t.db --module-name=Inferred > %t.inferred.cc1.rsp // RUN: %deps-to-rsp %t.db --module-name=System > %t.system.cc1.rsp // RUN: %deps-to-rsp %t.db --tu-index=0 > %t.tu.rsp diff --git a/clang/test/ClangScanDeps/modules-inferred.m b/clang/test/ClangScanDeps/modules-inferred.m --- a/clang/test/ClangScanDeps/modules-inferred.m +++ b/clang/test/ClangScanDeps/modules-inferred.m @@ -6,7 +6,7 @@ // RUN: %/S/Inputs/modules_inferred_cdb.json > %t.cdb // // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -format experimental-full \ -// RUN: -generate-modules-path-args -mode preprocess-minimized-sources > %t.result +// RUN: -generate-modules-path-args -mode preprocess-directives-scan > %t.result // RUN: cat %t.result | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t.dir -DSOURCEDIR=%/S --check-prefixes=CHECK #include diff --git a/clang/test/ClangScanDeps/modules.cpp b/clang/test/ClangScanDeps/modules.cpp --- a/clang/test/ClangScanDeps/modules.cpp +++ b/clang/test/ClangScanDeps/modules.cpp @@ -13,9 +13,9 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb.json > %t.cdb // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb_clangcl.json > %t_clangcl.cdb // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s // // The output order is non-deterministic when using more than one thread, @@ -23,17 +23,17 @@ // as it might fail if the results for `modules_cdb_input.cpp` are reported before // `modules_cdb_input2.cpp`. // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefix=CHECK1 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefix=CHECK1 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ // RUN: FileCheck --check-prefix=CHECK1 %s // RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess | \ // RUN: FileCheck --check-prefix=CHECK1 %s -// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefix=CHECK2 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefix=CHECK2 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ // RUN: FileCheck --check-prefix=CHECK2 %s diff --git a/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp b/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp --- a/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp +++ b/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp @@ -11,7 +11,7 @@ // RUN: touch %t.dir/Inputs/c_alias.h // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/preprocess_minimized_pragmas_cdb.json > %t.cdb // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-directives-scan | \ // RUN: FileCheck %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \ // RUN: FileCheck %s diff --git a/clang/test/ClangScanDeps/regular_cdb.cpp b/clang/test/ClangScanDeps/regular_cdb.cpp --- a/clang/test/ClangScanDeps/regular_cdb.cpp +++ b/clang/test/ClangScanDeps/regular_cdb.cpp @@ -10,9 +10,9 @@ // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb.json > %t.cdb // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb_clangcl.json > %t_clangcl.cdb // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \ @@ -31,9 +31,9 @@ // as it might fail if the results for `regular_cdb_input.cpp` are reported before // `regular_cdb_input2.cpp`. // -// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefix=CHECK1 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefix=CHECK1 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ @@ -41,9 +41,9 @@ // RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess | \ // RUN: FileCheck --check-prefix=CHECK1 %s -// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefix=CHECK2 %s -// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \ +// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-directives-scan | \ // RUN: FileCheck --check-prefix=CHECK2 %s // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \ diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c --- a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c @@ -1,3 +1,4 @@ -// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 +// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s -#define 0 0 // expected-error {{macro name must be an identifier}} +#define 0 0 +// CHECK: #define 0 0 diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c --- a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c @@ -15,7 +15,7 @@ #pragma include_alias(, "mystring.h") // CHECK: #pragma once -// CHECK-NEXT: #pragma push_macro( "MYMACRO" ) +// CHECK-NEXT: #pragma push_macro("MYMACRO") // CHECK-NEXT: #pragma pop_macro("MYMACRO") // CHECK-NEXT: #pragma clang module import mymodule -// CHECK-NEXT: #pragma include_alias(, "mystring.h") +// CHECK-NEXT: #pragma include_alias(,"mystring.h") diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -116,15 +116,15 @@ "mode", llvm::cl::desc("The preprocessing mode used to compute the dependencies"), llvm::cl::values( - clEnumValN(ScanningMode::MinimizedSourcePreprocessing, - "preprocess-minimized-sources", - "The set of dependencies is computed by preprocessing the " - "source files that were minimized to only include the " - "contents that might affect the dependencies"), + clEnumValN(ScanningMode::DependencyDirectivesScan, + "preprocess-directives-scan", + "The set of dependencies is computed by preprocessing with " + "special lexing after scanning the source files to get the " + "directives that might affect the dependencies"), clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess", "The set of dependencies is computed by preprocessing the " - "unmodified source files")), - llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing), + "source files")), + llvm::cl::init(ScanningMode::DependencyDirectivesScan), llvm::cl::cat(DependencyScannerCategory)); static llvm::cl::opt Format( diff --git a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp --- a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -6,45 +6,65 @@ // //===----------------------------------------------------------------------===// -#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/SmallString.h" #include "gtest/gtest.h" using namespace llvm; using namespace clang; -using namespace clang::minimize_source_to_dependency_directives; +using namespace clang::dependency_directives_scan; -namespace clang { +static bool minimizeSourceToDependencyDirectives( + StringRef Input, SmallVectorImpl &Out, + SmallVectorImpl &Tokens, + SmallVectorImpl &Directives) { + Out.clear(); + Tokens.clear(); + Directives.clear(); + if (scanSourceForDependencyDirectives(Input, Tokens, Directives)) + return true; -bool minimizeSourceToDependencyDirectives(StringRef Input, - SmallVectorImpl &Out) { - SmallVector Tokens; - return minimizeSourceToDependencyDirectives(Input, Out, Tokens); + raw_svector_ostream OS(Out); + printDependencyDirectivesAsSource(Input, Directives, OS); + Out.push_back('\0'); + Out.pop_back(); + + return false; } -} // end namespace clang +static bool minimizeSourceToDependencyDirectives(StringRef Input, + SmallVectorImpl &Out) { + SmallVector Tokens; + SmallVector Directives; + return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives); +} namespace { TEST(MinimizeSourceToDependencyDirectivesTest, Empty) { SmallVector Out; - SmallVector Tokens; + SmallVector Tokens; + SmallVector Directives; - ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("", Out, Tokens, Directives)); EXPECT_TRUE(Out.empty()); - ASSERT_EQ(1u, Tokens.size()); - ASSERT_EQ(pp_eof, Tokens.back().K); + EXPECT_TRUE(Tokens.empty()); + ASSERT_EQ(1u, Directives.size()); + ASSERT_EQ(pp_eof, Directives.back().Kind); - ASSERT_FALSE( - minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens, + Directives)); EXPECT_TRUE(Out.empty()); - ASSERT_EQ(1u, Tokens.size()); - ASSERT_EQ(pp_eof, Tokens.back().K); + EXPECT_TRUE(Tokens.empty()); + ASSERT_EQ(1u, Directives.size()); + ASSERT_EQ(pp_eof, Directives.back().Kind); } TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { SmallVector Out; - SmallVector Tokens; + SmallVector Tokens; + SmallVector Directives; ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define A\n" @@ -68,41 +88,42 @@ "#pragma include_alias(, )\n" "export module m;\n" "import m;\n", - Out, Tokens)); - EXPECT_EQ(pp_define, Tokens[0].K); - EXPECT_EQ(pp_undef, Tokens[1].K); - EXPECT_EQ(pp_endif, Tokens[2].K); - EXPECT_EQ(pp_if, Tokens[3].K); - EXPECT_EQ(pp_ifdef, Tokens[4].K); - EXPECT_EQ(pp_ifndef, Tokens[5].K); - EXPECT_EQ(pp_elifdef, Tokens[6].K); - EXPECT_EQ(pp_elifndef, Tokens[7].K); - EXPECT_EQ(pp_elif, Tokens[8].K); - EXPECT_EQ(pp_else, Tokens[9].K); - EXPECT_EQ(pp_include, Tokens[10].K); - EXPECT_EQ(pp_include_next, Tokens[11].K); - EXPECT_EQ(pp___include_macros, Tokens[12].K); - EXPECT_EQ(pp_import, Tokens[13].K); - EXPECT_EQ(decl_at_import, Tokens[14].K); - EXPECT_EQ(pp_pragma_import, Tokens[15].K); - EXPECT_EQ(pp_pragma_push_macro, Tokens[16].K); - EXPECT_EQ(pp_pragma_pop_macro, Tokens[17].K); - EXPECT_EQ(pp_pragma_include_alias, Tokens[18].K); - EXPECT_EQ(cxx_export_decl, Tokens[19].K); - EXPECT_EQ(cxx_module_decl, Tokens[20].K); - EXPECT_EQ(cxx_import_decl, Tokens[21].K); - EXPECT_EQ(pp_eof, Tokens[22].K); + Out, Tokens, Directives)); + EXPECT_EQ(pp_define, Directives[0].Kind); + EXPECT_EQ(pp_undef, Directives[1].Kind); + EXPECT_EQ(pp_endif, Directives[2].Kind); + EXPECT_EQ(pp_if, Directives[3].Kind); + EXPECT_EQ(pp_ifdef, Directives[4].Kind); + EXPECT_EQ(pp_ifndef, Directives[5].Kind); + EXPECT_EQ(pp_elifdef, Directives[6].Kind); + EXPECT_EQ(pp_elifndef, Directives[7].Kind); + EXPECT_EQ(pp_elif, Directives[8].Kind); + EXPECT_EQ(pp_else, Directives[9].Kind); + EXPECT_EQ(pp_include, Directives[10].Kind); + EXPECT_EQ(pp_include_next, Directives[11].Kind); + EXPECT_EQ(pp___include_macros, Directives[12].Kind); + EXPECT_EQ(pp_import, Directives[13].Kind); + EXPECT_EQ(decl_at_import, Directives[14].Kind); + EXPECT_EQ(pp_pragma_import, Directives[15].Kind); + EXPECT_EQ(pp_pragma_push_macro, Directives[16].Kind); + EXPECT_EQ(pp_pragma_pop_macro, Directives[17].Kind); + EXPECT_EQ(pp_pragma_include_alias, Directives[18].Kind); + EXPECT_EQ(cxx_export_module_decl, Directives[19].Kind); + EXPECT_EQ(cxx_import_decl, Directives[20].Kind); + EXPECT_EQ(pp_eof, Directives[21].Kind); } TEST(MinimizeSourceToDependencyDirectivesTest, Define) { SmallVector Out; - SmallVector Tokens; + SmallVector Tokens; + SmallVector Directives; - ASSERT_FALSE( - minimizeSourceToDependencyDirectives("#define MACRO", Out, Tokens)); - EXPECT_STREQ("#define MACRO\n", Out.data()); - ASSERT_EQ(2u, Tokens.size()); - ASSERT_EQ(pp_define, Tokens.front().K); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out, + Tokens, Directives)); + EXPECT_STREQ("#define MACRO", Out.data()); + ASSERT_EQ(4u, Tokens.size()); + ASSERT_EQ(2u, Directives.size()); + ASSERT_EQ(pp_define, Directives.front().Kind); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineSpacing) { @@ -129,37 +150,37 @@ SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO()", Out)); - EXPECT_STREQ("#define MACRO()\n", Out.data()); + EXPECT_STREQ("#define MACRO()", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO(a, b...)", Out)); - EXPECT_STREQ("#define MACRO(a,b...)\n", Out.data()); + EXPECT_STREQ("#define MACRO(a,b...)", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO content", Out)); - EXPECT_STREQ("#define MACRO content\n", Out.data()); + EXPECT_STREQ("#define MACRO content", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO con tent ", Out)); - EXPECT_STREQ("#define MACRO con tent\n", Out.data()); + EXPECT_STREQ("#define MACRO con tent", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO() con tent ", Out)); - EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); + EXPECT_STREQ("#define MACRO()con tent", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) { SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO((a))", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO(", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO(a*b)", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) { @@ -167,19 +188,19 @@ ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\t)\tcon \t tent\t", Out)); - EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data()); + EXPECT_STREQ("#define MACRO()con tent", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\f)\fcon \f tent\f", Out)); - EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data()); + EXPECT_STREQ("#define MACRO()con tent", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO(\v)\vcon \v tent\v", Out)); - EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data()); + EXPECT_STREQ("#define MACRO()con tent", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out)); - EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data()); + EXPECT_STREQ("#define MACRO con tent", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) { @@ -189,7 +210,7 @@ minimizeSourceToDependencyDirectives("#define MACRO(a \\\n" " )", Out)); - EXPECT_STREQ("#define MACRO(a)\n", Out.data()); + EXPECT_STREQ("#define MACRO(a)", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO(a, \\\n" @@ -197,7 +218,7 @@ " call((a), \\\n" " (b))", Out)); - EXPECT_STREQ("#define MACRO(a,b) call((a), (b))\n", Out.data()); + EXPECT_STREQ("#define MACRO(a,b)call((a),(b))", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, @@ -210,7 +231,7 @@ " call((a), \\\r" " (b))", Out)); - EXPECT_STREQ("#define MACRO(a,b) call((a), (b))\n", Out.data()); + EXPECT_STREQ("#define MACRO(a,b)call((a),(b))", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgsStringize) { @@ -220,7 +241,7 @@ " #a \\\n" " #b", Out)); - EXPECT_STREQ("#define MACRO(a,b) #a #b\n", Out.data()); + EXPECT_STREQ("#define MACRO(a,b) #a #b", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, @@ -233,7 +254,7 @@ " call((a), \\\r\n" " (b))", Out)); - EXPECT_STREQ("#define MACRO(a,b) call((a), (b))\n", Out.data()); + EXPECT_STREQ("#define MACRO(a,b)call((a),(b))", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, @@ -246,31 +267,33 @@ " call((a), \\\n\r" " (b))", Out)); - EXPECT_STREQ("#define MACRO(a,b) call((a), (b))\n", Out.data()); + EXPECT_STREQ("#define MACRO(a,b)call((a),(b))", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) { SmallVector Out; - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define 0\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) { SmallVector Out; - ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define &\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) { SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out)); - EXPECT_STREQ("#define AND &\n", Out.data()); + EXPECT_STREQ("#define AND&\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n" "&\n", Out)); - EXPECT_STREQ("#define AND &\n", Out.data()); + EXPECT_STREQ("#define AND\\\n" + "&\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) { @@ -422,11 +445,11 @@ ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#pragma include_alias(\"A\", \"B\")\n", Out)); - EXPECT_STREQ("#pragma include_alias(\"A\", \"B\")\n", Out.data()); + EXPECT_STREQ("#pragma include_alias(\"A\",\"B\")\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#pragma include_alias(, )\n", Out)); - EXPECT_STREQ("#pragma include_alias(, )\n", Out.data()); + EXPECT_STREQ("#pragma include_alias(,)\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out)); EXPECT_STREQ("", Out.data()); @@ -478,34 +501,38 @@ ASSERT_FALSE( minimizeSourceToDependencyDirectives("#__include_macros \n", Out)); EXPECT_STREQ("#__include_macros \n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include MACRO\n", Out)); + EXPECT_STREQ("#include MACRO\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A;\n", Out)); - EXPECT_STREQ("@import A;\n", Out.data()); + EXPECT_STREQ("@import A;", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import A;\n", Out)); - EXPECT_STREQ("@import A;\n", Out.data()); + EXPECT_STREQ("@import A;", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out)); - EXPECT_STREQ("@import A;\n", Out.data()); + EXPECT_STREQ("@import A;", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out)); - EXPECT_STREQ("@import A.B;\n", Out.data()); + EXPECT_STREQ("@import A.B;", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \n /*x*/ ; /*x*/", Out)); - EXPECT_STREQ("@import A.B;\n", Out.data()); + EXPECT_STREQ("@import A.B;", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) { SmallVector Out; ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out)); - ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); - ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); } TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) { @@ -556,7 +583,8 @@ "#define GUARD\n" "#endif\n", Out)); - EXPECT_STREQ("#ifndef GUARD\n" + EXPECT_STREQ("#if\\\n" + "ndef GUARD\n" "#define GUARD\n" "#endif\n", Out.data()); @@ -564,12 +592,16 @@ ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" "RD\n", Out)); - EXPECT_STREQ("#define GUARD\n", Out.data()); + EXPECT_STREQ("#define GUA\\\n" + "RD\n", + Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r" "RD\n", Out)); - EXPECT_STREQ("#define GUARD\n", Out.data()); + EXPECT_STREQ("#define GUA\\\r" + "RD\n", + Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" " RD\n", @@ -585,7 +617,10 @@ "2 + \\\t\n" "3\n", Out)); - EXPECT_STREQ("#define A 1 + 2 + 3\n", Out.data()); + EXPECT_STREQ("#define A 1+\\ \n" + "2+\\\t\n" + "3\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) { @@ -679,18 +714,19 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) { SmallVector Out; - SmallVector Tokens; + SmallVector Tokens; + SmallVector Directives; StringRef Source = R"(// comment #pragma once // another comment #include )"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens)); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); EXPECT_STREQ("#pragma once\n#include \n", Out.data()); - ASSERT_EQ(Tokens.size(), 3u); - EXPECT_EQ(Tokens[0].K, - minimize_source_to_dependency_directives::pp_pragma_once); + ASSERT_EQ(Directives.size(), 3u); + EXPECT_EQ(Directives[0].Kind, pp_pragma_once); Source = R"(// comment #pragma once extra tokens @@ -698,7 +734,7 @@ #include )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#pragma once\n#include \n", Out.data()); + EXPECT_STREQ("#pragma once extra tokens\n#include \n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, @@ -753,12 +789,13 @@ Source = "#define X \"\\ \r\nx\n#include \n"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#define X \"\\ \r\nx\n#include \n", Out.data()); + EXPECT_STREQ("#define X\"\\ \r\nx\n#include \n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) { SmallVector Out; - SmallVector Tokens; + SmallVector Tokens; + SmallVector Directives; StringRef Source = R"( module; @@ -787,82 +824,17 @@ import f(->a = 3); } )"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens)); - EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;\n" - "export import :l [[rename]];\n" - "import <<= 3;\nimport a b d e d e f e;\n" - "import foo [[no_unique_address]];\nimport foo();\n" - "import f(:sefse);\nimport f(->a = 3);\n", Out.data()); - ASSERT_EQ(Tokens.size(), 12u); - EXPECT_EQ(Tokens[0].K, - minimize_source_to_dependency_directives::pp_include); - EXPECT_EQ(Tokens[2].K, - minimize_source_to_dependency_directives::cxx_module_decl); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasic) { - SmallString<128> Out; - SmallVector Toks; - StringRef Source = "#ifndef GUARD\n" - "#define GUARD\n" - "void foo();\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Toks)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Toks, Ranges)); - EXPECT_EQ(Ranges.size(), 1u); - EXPECT_EQ(Ranges[0].Offset, 0); - EXPECT_EQ(Ranges[0].Length, (int)Out.find("#endif")); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasicElifdef) { - SmallString<128> Out; - SmallVector Toks; - StringRef Source = "#ifdef BLAH\n" - "void skip();\n" - "#elifdef BLAM\n" - "void skip();\n" - "#elifndef GUARD\n" - "#define GUARD\n" - "void foo();\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Toks)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Toks, Ranges)); - EXPECT_EQ(Ranges.size(), 3u); - EXPECT_EQ(Ranges[0].Offset, 0); - EXPECT_EQ(Ranges[0].Length, (int)Out.find("#elifdef")); - EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elifdef")); - EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#elifndef")); - EXPECT_EQ(Ranges[2].Offset, (int)Out.find("#elifndef")); - EXPECT_EQ(Ranges[2].Offset + Ranges[2].Length, (int)Out.rfind("#endif")); -} - -TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesNested) { - SmallString<128> Out; - SmallVector Toks; - StringRef Source = "#ifndef GUARD\n" - "#define GUARD\n" - "#if FOO\n" - "#include hello\n" - "#elif BAR\n" - "#include bye\n" - "#endif\n" - "#else\n" - "#include nothing\n" - "#endif\n"; - ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Toks)); - SmallVector Ranges; - ASSERT_FALSE(computeSkippedRanges(Toks, Ranges)); - EXPECT_EQ(Ranges.size(), 4u); - EXPECT_EQ(Ranges[0].Offset, (int)Out.find("#if FOO")); - EXPECT_EQ(Ranges[0].Offset + Ranges[0].Length, (int)Out.find("#elif")); - EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elif BAR")); - EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#endif")); - EXPECT_EQ(Ranges[2].Offset, 0); - EXPECT_EQ(Ranges[2].Length, (int)Out.find("#else")); - EXPECT_EQ(Ranges[3].Offset, (int)Out.find("#else")); - EXPECT_EQ(Ranges[3].Offset + Ranges[3].Length, (int)Out.rfind("#endif")); + ASSERT_FALSE( + minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); + EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;" + "exp\\\nort import:l[[rename]];" + "import<<=3;import a b d e d e f e;" + "import foo[[no_unique_address]];import foo();" + "import f(:sefse);import f(->a=3);", + Out.data()); + ASSERT_EQ(Directives.size(), 10u); + EXPECT_EQ(Directives[0].Kind, pp_include); + EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl); } } // end anonymous namespace diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp --- a/clang/unittests/Tooling/DependencyScannerTest.cpp +++ b/clang/unittests/Tooling/DependencyScannerTest.cpp @@ -204,53 +204,5 @@ EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h"); } -namespace dependencies { -TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) { - auto VFS = llvm::makeIntrusiveRefCnt(); - VFS->addFile("/mod.h", 0, - llvm::MemoryBuffer::getMemBuffer("#include \n" - "// hi there!\n")); - - DependencyScanningFilesystemSharedCache SharedCache; - ExcludedPreprocessorDirectiveSkipMapping Mappings; - DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings); - - DepFS.enableMinimizationOfAllFiles(); // Let's be explicit for clarity. - auto StatusMinimized0 = DepFS.status("/mod.h"); - DepFS.disableMinimization("/mod.h"); - auto StatusFull1 = DepFS.status("/mod.h"); - - EXPECT_TRUE(StatusMinimized0); - EXPECT_TRUE(StatusFull1); - EXPECT_EQ(StatusMinimized0->getSize(), 17u); - EXPECT_EQ(StatusFull1->getSize(), 30u); - EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h")); - EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h")); -} - -TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) { - auto VFS = llvm::makeIntrusiveRefCnt(); - VFS->addFile("/mod.h", 0, - llvm::MemoryBuffer::getMemBuffer("#include \n" - "// hi there!\n")); - - DependencyScanningFilesystemSharedCache SharedCache; - ExcludedPreprocessorDirectiveSkipMapping Mappings; - DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings); - - DepFS.disableMinimization("/mod.h"); - auto StatusFull0 = DepFS.status("/mod.h"); - DepFS.enableMinimizationOfAllFiles(); - auto StatusMinimized1 = DepFS.status("/mod.h"); - - EXPECT_TRUE(StatusFull0); - EXPECT_TRUE(StatusMinimized1); - EXPECT_EQ(StatusFull0->getSize(), 30u); - EXPECT_EQ(StatusMinimized1->getSize(), 17u); - EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h")); - EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h")); -} - -} // end namespace dependencies } // end namespace tooling } // end namespace clang