Index: include/clang/Basic/FileManager.h =================================================================== --- include/clang/Basic/FileManager.h +++ include/clang/Basic/FileManager.h @@ -176,6 +176,8 @@ IntrusiveRefCntPtr FS = nullptr); ~FileManager(); + void setFileSystemOpts(const FileSystemOptions &FSO) { FileSystemOpts = FSO; } + /// Installs the provided FileSystemStatCache object within /// the FileManager. /// Index: include/clang/Basic/VirtualFileSystem.h =================================================================== --- include/clang/Basic/VirtualFileSystem.h +++ include/clang/Basic/VirtualFileSystem.h @@ -71,6 +71,7 @@ static Status copyWithNewName(const Status &In, StringRef NewName); static Status copyWithNewName(const llvm::sys::fs::file_status &In, StringRef NewName); + static Status copyWithNewSize(const Status &In, uint64_t Size); /// Returns the name that should be used for this file or directory. StringRef getName() const { return Name; } Index: include/clang/Frontend/CompilerInstance.h =================================================================== --- include/clang/Frontend/CompilerInstance.h +++ include/clang/Frontend/CompilerInstance.h @@ -46,6 +46,7 @@ class FrontendAction; class MemoryBufferCache; class Module; +class PPCallbacks; class Preprocessor; class Sema; class SourceManager; @@ -127,6 +128,10 @@ /// The module provider. std::shared_ptr ThePCHContainerOperations; + /// A dependency file generator that should be used instead of the default + /// file writer. + std::unique_ptr OverridenDepFileGenerator; + /// The dependency file generator. std::unique_ptr TheDependencyFileGenerator; @@ -186,6 +191,9 @@ /// Force an output buffer. std::unique_ptr OutputStream; + /// Additional PPCallbacks to add to the created preprocessor. + std::unique_ptr AdditionalPPCallbacks; + CompilerInstance(const CompilerInstance &) = delete; void operator=(const CompilerInstance &) = delete; public: @@ -253,6 +261,15 @@ BuildGlobalModuleIndex = Build; } + /// Set the additional PP callbacks that will be added to the created + /// preprocessor. + void setAdditionalPPCallbacks(std::unique_ptr PPC); + + /// Set the overriden dependency file generator that will used by the + /// created file dependency collector instead of the default one. + void setOverridenDepFileGenerator( + std::unique_ptr Gen); + /// } /// @name Forwarding Methods /// { Index: include/clang/Frontend/CompilerInvocation.h =================================================================== --- include/clang/Frontend/CompilerInvocation.h +++ include/clang/Frontend/CompilerInvocation.h @@ -22,6 +22,7 @@ #include "clang/Frontend/PreprocessorOutputOptions.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/Option/OptTable.h" #include #include @@ -147,9 +148,10 @@ /// \param ArgEnd - The last element in the argument vector. /// \param Diags - The diagnostic engine to use for errors. static bool CreateFromArgs(CompilerInvocation &Res, - const char* const *ArgBegin, - const char* const *ArgEnd, - DiagnosticsEngine &Diags); + const char *const *ArgBegin, + const char *const *ArgEnd, + DiagnosticsEngine &Diags, + llvm::opt::OptTable *Options = nullptr); /// Get the directory where the compiler headers /// reside, relative to the compiler binary (found by the passed in Index: include/clang/Frontend/Utils.h =================================================================== --- include/clang/Frontend/Utils.h +++ include/clang/Frontend/Utils.h @@ -112,6 +112,13 @@ std::vector Dependencies; }; +/// Builds a dependency consumer. +class DependencyFileGeneratorConsumer { +public: + virtual ~DependencyFileGeneratorConsumer() {} + virtual void sawDependency(StringRef Filename) = 0; +}; + /// Builds a depdenency file when attached to a Preprocessor (for includes) and /// ASTReader (for module imports), and writes it out at the end of processing /// a source file. Users should attach to the ast reader whenever a module is @@ -123,7 +130,8 @@ public: static DependencyFileGenerator *CreateAndAttachToPreprocessor( - Preprocessor &PP, const DependencyOutputOptions &Opts); + Preprocessor &PP, const DependencyOutputOptions &Opts, + DependencyFileGeneratorConsumer *Consumer = nullptr); void AttachToASTReader(ASTReader &R); }; Index: include/clang/Lex/FilterToIncludes.h =================================================================== --- /dev/null +++ include/clang/Lex/FilterToIncludes.h @@ -0,0 +1,95 @@ +//===- clang/Lex/FilterToIncludes.h - Filter down to just includes -*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the interface for filtering header and source files to the +/// minimum necessary for evaluating includes. It leaves behind #define, +/// #include, #import, @import, and any conditional preprocessor logic that +/// contains one of those. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_FILTER_TO_INCLUDES_H +#define LLVM_CLANG_LEX_FILTER_TO_INCLUDES_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace filter_to_includes { + +enum TokenKind { + pp_none, + pp_include, + pp___include_macros, + pp_define, + pp_undef, + pp_import, + pp_at_import, + pp_pragma_import, + pp_include_next, + pp_if, + pp_ifdef, + pp_ifndef, + pp_elif, + pp_else, + pp_endif, + pp_eof, +}; + +/// Simplified token to track the location of various directives. +struct Token { + /// The kind of token. + TokenKind K = pp_none; + + /// Offset into the output byte stream of where the directive begins. + int Offset = -1; + + Token(TokenKind K, int Offset) : K(K), Offset(Offset) {} +}; + +/// Simplified token range to track the range of a potentially skippable PP +/// directive. +struct SkippedRange { + /// Offset into the output byte stream of where the skipped directive begins. + int Offset; + + /// The number of bytes that can be skipped before the preprocessing must + /// resume. + int Length; +}; + +/// \returns false on success, true on error. +bool computeSkippedRanges(llvm::ArrayRef Input, + llvm::SmallVectorImpl &Range); + +} // end namespace filter_to_includes + +/// Filter the input down to just the includes and macro definitions. +/// +/// Simplify the input down to the minimum necessary to evaluate include +/// dependendencies. Delete all non-preprocessor code, and strip anything that +/// can't affect what gets included. Canonicalize whitespace where convenient +/// to stabilize the output against formatting changes in the input. +/// +/// Clears the output vectors at the beginning of the call. +/// +/// \returns false on success, true on error. +///{ +bool filterToIncludes(llvm::StringRef Input, + llvm::SmallVectorImpl &Output, + llvm::SmallVectorImpl &Tokens); +bool filterToIncludes(llvm::StringRef Input, + llvm::SmallVectorImpl &Output); +///} + +} // end namespace clang + +#endif // LLVM_CLANG_LEX_FILTER_TO_INCLUDES_H Index: include/clang/Lex/Lexer.h =================================================================== --- include/clang/Lex/Lexer.h +++ include/clang/Lex/Lexer.h @@ -200,6 +200,18 @@ return BufferPtr == BufferEnd; } + /// Returns the current lexing offset. + unsigned getCurrentBufferOffset(); + + /// Skip over \p NumBytes bytes. + /// + /// If the skip is successful, the next token will be lexed from the new + /// offset. The lexer also assumes that we skipped to the start of the line. + /// + /// \returns true if the skip failed (new offset would have been past the + /// end of the buffer), false otherwise. + bool skipOver(unsigned NumBytes); + /// isKeepWhitespaceMode - Return true if the lexer should return tokens for /// every character in the file, including whitespace and comments. This /// should only be used in raw mode, as the preprocessor is not prepared to Index: include/clang/Lex/PPCallbacks.h =================================================================== --- include/clang/Lex/PPCallbacks.h +++ include/clang/Lex/PPCallbacks.h @@ -28,6 +28,7 @@ class MacroDefinition; class MacroDirective; class MacroArgs; + class Preprocessor; /// This interface provides a way to observe the actions of the /// preprocessor as it does its thing. @@ -41,6 +42,11 @@ EnterFile, ExitFile, SystemHeaderPragma, RenameFile }; + /// This skipped range class returns an optional thing. + virtual Optional getSkippedRangeForExcludedConditionalBlock(Preprocessor &PP, SourceLocation HashLoc, unsigned CurLexerBufferOffset) { + return None; + } + /// Callback invoked whenever a source file is entered or exited. /// /// \param Loc Indicates the new location. @@ -354,6 +360,16 @@ std::unique_ptr _Second) : First(std::move(_First)), Second(std::move(_Second)) {} + Optional getSkippedRangeForExcludedConditionalBlock( + Preprocessor &PP, SourceLocation HashLoc, + unsigned CurLexerBufferOffset) override { + if (auto I = First->getSkippedRangeForExcludedConditionalBlock( + PP, HashLoc, CurLexerBufferOffset)) + return I; + return Second->getSkippedRangeForExcludedConditionalBlock( + PP, HashLoc, CurLexerBufferOffset); + } + void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) override { Index: include/clang/Tooling/ScanDeps/DependencyScanner.h =================================================================== --- /dev/null +++ include/clang/Tooling/ScanDeps/DependencyScanner.h @@ -0,0 +1,59 @@ +//===- DependencyScanner.h - Fast clang-scan-deps ============---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SCAN_DEPS_DEPENDENCY_SCANNER_H +#define LLVM_CLANG_SCAN_DEPS_DEPENDENCY_SCANNER_H + +#include "clang/Basic/LLVM.h" +#include "llvm/Support/Error.h" +#include + +namespace clang { +namespace scan_deps { + +class DependencyScannerServiceImpl; +class ThreadLocalPersistentCompilerInstance; + +/// Represents an instance of a dependency scanner service. +/// +/// The dependency scanner service is a global instance that is owns the +/// global cache and other global state that's shared between the dependency +/// scanner workers. +class DependencyScannerService { +public: + DependencyScannerService(); + ~DependencyScannerService(); + + void printStatistics(llvm::raw_ostream &OS) const; + +private: + std::unique_ptr Impl; + friend class DependencyScannerWorker; +}; + +/// An individual dependency scanner worker that should run on its own thread. +class DependencyScannerWorker { +public: + DependencyScannerWorker(const DependencyScannerService &Service); + ~DependencyScannerWorker(); + + /// Returns the set of files that are required when invoking a particular + /// compiler invocation. + llvm::Expected> + getFileDependencies(ArrayRef Compilation, + StringRef WorkingDirectory); + +private: + std::unique_ptr CI; +}; + +} // end namespace scan_deps +} // end namespace clang + +#endif // LLVM_CLANG_SCAN_DEPS_DEPENDENCY_SCANNER_H Index: include/clang/Tooling/ScanDeps/PPRangeSkipping.h =================================================================== --- /dev/null +++ include/clang/Tooling/ScanDeps/PPRangeSkipping.h @@ -0,0 +1,86 @@ +//===- PPRangeSkipping.h - PP Callbacks to skip inactive range --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SCAN_DEPS_PP_RANGE_SKIPPING_H +#define LLVM_CLANG_SCAN_DEPS_PP_RANGE_SKIPPING_H + +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/PPCallbacks.h" +#include "llvm/ADT/DenseMap.h" +#include + +namespace clang { +namespace scan_deps { + +/// A mapping from an offset into a buffer to the number of bytes that can be +/// skipped by the preprocessor when skipping over inactive preprocessor ranges. +using SkippedRangeMapping = llvm::DenseMap; + +/// Contains the currently active skipped range mappings for a particular +/// preprocessor instance. +class PreprocessorSkippedMappings { +public: + /// Clears the buffer -> skipped range mappings. + void reset() { MappingForBuffer.clear(); } + + /// Inserts a new mapping that maps from the specified memory buffer to the + /// specified skipped range mappings. + void setSkippedRanges(const llvm::MemoryBuffer *Buf, + const SkippedRangeMapping *Mapping) { + MappingForBuffer[Buf] = Mapping; + } + + /// Returns the skipped range mappings for the given buffer or null if none + /// exist. + const SkippedRangeMapping * + getSkippedRanges(const llvm::MemoryBuffer *Buf) const { + auto It = MappingForBuffer.find(Buf); + if (It != MappingForBuffer.end()) + return It->getSecond(); + return nullptr; + } + +private: + llvm::DenseMap + MappingForBuffer; +}; + +/// A PP callbacks instance that implements fast PP skipping. +class PPRangeSkippingCallbacks final : public PPCallbacks { +public: + PPRangeSkippingCallbacks(PreprocessorSkippedMappings &BufferMappings) + : BufferMappings(BufferMappings) {} + + Optional getSkippedRangeForExcludedConditionalBlock( + Preprocessor &PP, SourceLocation HashLoc, + unsigned CurLexerBufferOffset) override; + + void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override; + +private: + PreprocessorSkippedMappings &BufferMappings; + struct FileInfo { + SourceLocation Loc; + FileID File; + const SkippedRangeMapping *PPSkippedRanges = nullptr; + + FileInfo(SourceLocation Loc) : Loc(Loc) {} + + bool isValid() const { return File.isValid(); } + }; + llvm::SmallVector FileStack; +}; + +} // end namespace scan_deps +} // end namespace clang + +#endif // LLVM_CLANG_SCAN_DEPS_PP_RANGE_SKIPPING_H Index: lib/Basic/FileManager.cpp =================================================================== --- lib/Basic/FileManager.cpp +++ lib/Basic/FileManager.cpp @@ -448,15 +448,20 @@ } // Otherwise, open the file. - - if (FileSystemOpts.WorkingDir.empty()) - return FS->getBufferForFile(Filename, FileSize, + llvm::ErrorOr> Result = nullptr; + if (FileSystemOpts.WorkingDir.empty()) { + Result = FS->getBufferForFile(Filename, FileSize, /*RequiresNullTerminator=*/true, isVolatile); - - SmallString<128> FilePath(Entry->getName()); - FixupRelativePath(FilePath); - return FS->getBufferForFile(FilePath, FileSize, - /*RequiresNullTerminator=*/true, isVolatile); + } else { + SmallString<128> FilePath(Entry->getName()); + FixupRelativePath(FilePath); + Result = FS->getBufferForFile(FilePath, FileSize, + /*RequiresNullTerminator=*/true, isVolatile); + } + // The VFS might have overriden this. + if (Result) + const_cast(Entry)->Size = (*Result)->getBufferSize(); + return Result; } llvm::ErrorOr> Index: lib/Basic/VirtualFileSystem.cpp =================================================================== --- lib/Basic/VirtualFileSystem.cpp +++ lib/Basic/VirtualFileSystem.cpp @@ -87,6 +87,12 @@ In.permissions()); } +Status Status::copyWithNewSize(const Status &In, uint64_t Size) { + return Status(In.getName(), In.getUniqueID(), In.getLastModificationTime(), + In.getUser(), In.getGroup(), Size, In.getType(), + In.getPermissions()); +} + bool Status::equivalent(const Status &Other) const { assert(isStatusKnown() && Other.isStatusKnown()); return getUniqueID() == Other.getUniqueID(); Index: lib/Frontend/CompilerInstance.cpp =================================================================== --- lib/Frontend/CompilerInstance.cpp +++ lib/Frontend/CompilerInstance.cpp @@ -77,6 +77,16 @@ Invocation = std::move(Value); } +void CompilerInstance::setAdditionalPPCallbacks( + std::unique_ptr PPC) { + AdditionalPPCallbacks = std::move(PPC); +} + +void CompilerInstance::setOverridenDepFileGenerator( + std::unique_ptr Gen) { + OverridenDepFileGenerator = std::move(Gen); +} + bool CompilerInstance::shouldBuildGlobalModuleIndex() const { return (BuildGlobalModuleIndex || (ModuleManager && ModuleManager->isGlobalIndexUnavailable() && @@ -427,7 +437,8 @@ const DependencyOutputOptions &DepOpts = getDependencyOutputOpts(); if (!DepOpts.OutputFile.empty()) TheDependencyFileGenerator.reset( - DependencyFileGenerator::CreateAndAttachToPreprocessor(*PP, DepOpts)); + DependencyFileGenerator::CreateAndAttachToPreprocessor( + *PP, DepOpts, OverridenDepFileGenerator.get())); if (!DepOpts.DOTOutputFile.empty()) AttachDependencyGraphGen(*PP, DepOpts.DOTOutputFile, getHeaderSearchOpts().Sysroot); @@ -468,6 +479,9 @@ /*ShowAllHeaders=*/true, /*OutputPath=*/"", /*ShowDepth=*/true, /*MSStyle=*/true); } + + if (AdditionalPPCallbacks) + PP->addPPCallbacks(std::move(AdditionalPPCallbacks)); } std::string CompilerInstance::getSpecificModuleCachePath() { Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -3010,11 +3010,19 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res, const char *const *ArgBegin, const char *const *ArgEnd, - DiagnosticsEngine &Diags) { + DiagnosticsEngine &Diags, + llvm::opt::OptTable *Options) { bool Success = true; // Parse the arguments. - std::unique_ptr Opts = createDriverOptTable(); + std::unique_ptr OwnedOpts; + llvm::opt::OptTable *Opts = Options; + if (!Options) { + OwnedOpts = createDriverOptTable(); + Opts = OwnedOpts.get(); + } else { + Opts = Options; + } const unsigned IncludedFlagsBitmask = options::CC1Option; unsigned MissingArgIndex, MissingArgCount; InputArgList Args = Index: lib/Frontend/DependencyFile.cpp =================================================================== --- lib/Frontend/DependencyFile.cpp +++ lib/Frontend/DependencyFile.cpp @@ -164,6 +164,7 @@ bool IncludeModuleFiles; DependencyOutputFormat OutputFormat; unsigned InputFileIndex; + clang::DependencyFileGeneratorConsumer *Consumer; private: bool FileMatchesDepCriteria(const char *Filename, @@ -171,7 +172,8 @@ void OutputDependencyFile(); public: - DFGImpl(const Preprocessor *_PP, const DependencyOutputOptions &Opts) + DFGImpl(const Preprocessor *_PP, const DependencyOutputOptions &Opts, + clang::DependencyFileGeneratorConsumer *Consumer) : PP(_PP), OutputFile(Opts.OutputFile), Targets(Opts.Targets), IncludeSystemHeaders(Opts.IncludeSystemHeaders), PhonyTarget(Opts.UsePhonyTargets), @@ -179,7 +181,7 @@ SeenMissingHeader(false), IncludeModuleFiles(Opts.IncludeModuleFiles), OutputFormat(Opts.OutputFormat), - InputFileIndex(0) { + InputFileIndex(0), Consumer(Consumer) { for (const auto &ExtraDep : Opts.ExtraDeps) { if (AddFilename(ExtraDep)) ++InputFileIndex; @@ -244,7 +246,8 @@ : Impl(Impl) { } DependencyFileGenerator *DependencyFileGenerator::CreateAndAttachToPreprocessor( - clang::Preprocessor &PP, const clang::DependencyOutputOptions &Opts) { + clang::Preprocessor &PP, const clang::DependencyOutputOptions &Opts, + clang::DependencyFileGeneratorConsumer *Consumer) { if (Opts.Targets.empty()) { PP.getDiagnostics().Report(diag::err_fe_dependency_file_requires_MT); @@ -255,7 +258,7 @@ if (Opts.AddMissingHeaderDeps) PP.SetSuppressIncludeNotFoundError(true); - DFGImpl *Callback = new DFGImpl(&PP, Opts); + DFGImpl *Callback = new DFGImpl(&PP, Opts, Consumer); PP.addPPCallbacks(std::unique_ptr(Callback)); PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks( llvm::make_unique(*Callback)); @@ -435,6 +438,11 @@ llvm::sys::fs::remove(OutputFile); return; } + if (Consumer) { + for (const auto &Filename : Files) + Consumer->sawDependency(Filename); + return; + } std::error_code EC; llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::F_Text); Index: lib/Lex/CMakeLists.txt =================================================================== --- lib/Lex/CMakeLists.txt +++ lib/Lex/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangLex + FilterToIncludes.cpp HeaderMap.cpp HeaderSearch.cpp Lexer.cpp Index: lib/Lex/FilterToIncludes.cpp =================================================================== --- /dev/null +++ lib/Lex/FilterToIncludes.cpp @@ -0,0 +1,743 @@ +//===- FilterToIncludes.cpp - Filter down to just includes ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the implementation for filtering header and source files to the +/// minimum necessary for evaluating includes. It leaves behind #define, +/// #include, #import, @import, and any conditional preprocessor logic that +/// contains one of those. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/CharInfo.h" +#include "clang/Lex/FilterToIncludes.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace clang; +using namespace clang::filter_to_includes; + +namespace { + +struct Lexer { + SmallVectorImpl &Out; + SmallVectorImpl &Tokens; + + Lexer(SmallVectorImpl &Out, SmallVectorImpl &Tokens) + : Out(Out), Tokens(Tokens) {} + + bool lex(StringRef Bytes); + + StringMap SplitIds; + +private: + + struct IdInfo { + const char *Last; + StringRef Name; + }; + + LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); + LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, + const char *const End); + LLVM_NODISCARD bool lexImpl(const char *First, const char *const End); + LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); + LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, + const char *&First, const char *const End); + Token &makeToken(TokenKind K) { + Tokens.emplace_back(K, Out.size()); + return Tokens.back(); + } + void popToken() { + Out.resize(Tokens.back().Offset); + Tokens.pop_back(); + } + TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } + + Lexer &put(char Byte) { + Out.push_back(Byte); + return *this; + } + Lexer &append(StringRef S) { return append(S.begin(), S.end()); } + Lexer &append(const char *First, const char *Last) { + Out.append(First, Last); + return *this; + } + + void printToNewline(const char *&First, const char *const End); + void printAdjacentModuleNameParts(const char *&First, const char *const End); + LLVM_NODISCARD bool printAtImportBody(const char *&First, + const char *const End); + void printDirectiveBody(const char *&First, const char *const End); + void printAdjacentMacroArgs(const char *&First, const char *const End); + LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); +}; + +} // end namespace + +static void skipOverSpaces(const char *&First, const char *const End) { + while (First != End && isHorizontalWhitespace(*First)) + ++First; +} + +LLVM_NODISCARD static bool isRawStringLiteral(const char *First, const char *Current) { + assert(First <= Current); + + // Check if we can even back up. + if (*Current != '\"' || First == Current) + return false; + + // Check for an "R". + --Current; + if (*Current != 'R') + return false; + if (First == Current || !isIdentifierBody(*--Current)) + return true; + + // Check for a prefix of "u", "U", or "L". + if (*Current == 'u' || *Current == 'U' || *Current == 'L') + return First == Current || !isIdentifierBody(*--Current); + + // Check for a prefix of "u8". + if (*Current != '8' || First == Current || *Current-- != 'u') + return false; + return First == Current || !isIdentifierBody(*--Current); +} + +static void skipRawString(const char *&First, const char *const End) { + assert(First[0] == '\"'); + assert(First[-1] == 'R'); + + const char *Last = ++First; + while (Last != End && *Last != '(') + ++Last; + if (Last == End) { + First = Last; // Hit the end... just give up. + return; + } + + StringRef Terminator(First, Last - First); + for (;;) { + // Move First to just past the next ")". + First = Last; + while (First != End && *First != ')') + ++First; + if (First == End) + return; + ++First; + + // Look ahead for the terminator sequence. + Last = First; + while (Last != End && size_t(Last - First) < Terminator.size() && + Terminator[Last - First] == *Last) + ++Last; + + // Check if we hit it (or the end of the file). + if (Last == End) { + First = Last; + return; + } + if (size_t(Last - First) < Terminator.size()) + continue; + if (*Last != '\"') + continue; + First = Last + 1; + return; + } +} + +static void skipString(const char *&First, const char *const End) { + assert(*First == '\'' || *First == '\"'); + const char Terminator = *First; + for (++First; First != End && *First != Terminator; ++First) + if (*First == '\\') + if (++First == End) + return; + if (First != End) + ++First; // Finish off the string. +} + +static void skipNewline(const char *&First, const char *End) { + assert(isVerticalWhitespace(*First)); + ++First; + if (First == End) + return; + + // Check for "\n\r" and "\r\n". + if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0])) + ++First; +} + +static void skipToNewlineRaw(const char *&First, const char *const End) { + for (;;) { + if (First == End) + return; + + if (isVerticalWhitespace(*First)) + return; + + while (!isVerticalWhitespace(*First)) + if (++First == End) + return; + + if (First[-1] != '\\') + return; + + ++First; // Keep going... + } +} + +static const char *reverseOverSpaces(const char *First, const char *Last) { + while (First != Last && isHorizontalWhitespace(Last[-1])) + --Last; + return Last; +} + +static void skipLineComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '/'); + First += 2; + skipToNewlineRaw(First, End); +} + +static void skipBlockComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '*'); + if (End - First < 4) { + First = End; + return; + } + for (First += 3; First != End; ++First) + if (First[-1] == '*' && First[0] == '/') { + ++First; + return; + } +} + +static void skipLine(const char *&First, const char *const End) { + do { + assert(First <= End); + if (First == End) + return; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + return; + } + const char *Start = First; + while (First != End && !isVerticalWhitespace(*First)) { + // Iterate over strings correctly to avoid comments and newlines. + if (*First == '\"' || *First == '\'') { + if (isRawStringLiteral(Start, First)) + skipRawString(First, End); + else + skipString(First, End); + continue; + } + + // Iterate over comments correctly. + if (*First != '/' || End - First < 2) { + ++First; + continue; + } + + if (First[1] == '/') { + // "//...". + skipLineComment(First, End); + continue; + } + + if (First[1] != '*') { + ++First; + continue; + } + + // "/*...*/". + skipBlockComment(First, End); + } + if (First == End) + return; + + // Skip over the newline. + assert(isVerticalWhitespace(*First)); + skipNewline(First, End); + } while (First[-2] == '\\'); // Continue past line-continuations. +} + +void Lexer::printToNewline(const char *&First, const char *const End) { + while (First != End && !isVerticalWhitespace(*First)) { + const char *Last = First; + do { + // Iterate over strings correctly to avoid comments and newlines. + if (*Last == '\"' || *Last == '\'') { + if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) + skipRawString(Last, End); + else + skipString(Last, End); + continue; + } + if (*Last != '/' || End - Last < 2) { + ++Last; + continue; // Gather the rest up to print verbatim. + } + + if (Last[1] != '/' && Last[1] != '*') { + ++Last; + continue; + } + + // Deal with "//..." and "/*...*/". + append(First, reverseOverSpaces(First, Last)); + First = Last; + + if (Last[1] == '/') { + skipLineComment(First, End); + return; + } + + put(' '); + skipBlockComment(First, End); + skipOverSpaces(First, End); + Last = First; + } while (Last != End && !isVerticalWhitespace(*Last)); + + // Print out the string. + if (Last == End || Last == First || Last[-1] != '\\') { + append(First, reverseOverSpaces(First, Last)); + return; + } + + // Print up to the backslash, backing up over spaces. + append(First, reverseOverSpaces(First, Last - 1)); + + First = Last; + skipNewline(First, End); + skipOverSpaces(First, End); + } +} + +static void skipWhitespace(const char *&First, const char *const End) { + for (;;) { + assert(First <= End); + skipOverSpaces(First, End); + + if (End - First < 2) + return; + + if (First[0] == '\\' && isVerticalWhitespace(First[1])) { + skipNewline(++First, End); + continue; + } + + // Check for a non-comment character. + if (First[0] != '/') + return; + + // "// ...". + if (First[1] == '/') { + skipLineComment(First, End); + return; + } + + // Cannot be a comment. + if (First[1] != '*') + return; + + // "/*...*/". + skipBlockComment(First, End); + } +} + +void Lexer::printAdjacentModuleNameParts(const char *&First, + const char *const End) { + // Skip over parts of the body. + const char *Last = First; + do + ++Last; + while (Last != End && (isIdentifierBody(*Last) || *Last == '.')); + append(First, Last); + First = Last; +} + +bool Lexer::printAtImportBody(const char *&First, const char *const End) { + for (;;) { + skipWhitespace(First, End); + if (First == End) + return true; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + continue; + } + + // Found a semicolon. + if (*First == ';') { + put(*First++).put('\n'); + return false; + } + + // Don't handle macro expansions inside @import for now. + if (!isIdentifierBody(*First) && *First != '.') + return true; + + printAdjacentModuleNameParts(First, End); + } +} + +void Lexer::printDirectiveBody(const char *&First, const char *const End) { + skipWhitespace(First, End); // Skip initial whitespace. + printToNewline(First, End); + while (Out.back() == ' ') + Out.pop_back(); + put('\n'); +} + +LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, + const char *const End) { + assert(isIdentifierBody(*First) && "invalid identifer"); + const char *Last = First + 1; + while (Last != End && isIdentifierBody(*Last)) + ++Last; + return Last; +} + +LLVM_NODISCARD static const char * +getIdentifierContinuation(const char *First, const char *const End) { + if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) + return nullptr; + + ++First; + skipNewline(First, End); + if (First == End) + return nullptr; + return isIdentifierBody(First[0]) ? First : nullptr; +} + +Lexer::IdInfo Lexer::lexIdentifier(const char *First, const char *const End) { + const char *Last = lexRawIdentifier(First, End); + const char *Next = getIdentifierContinuation(Last, End); + if (LLVM_LIKELY(!Next)) + return IdInfo{Last, StringRef(First, Last - First)}; + + // Slow path, where identifiers are split over lines. + SmallVector Id(First, Last); + while (Next) { + Last = lexRawIdentifier(Next, End); + Id.append(Next, Last); + Next = getIdentifierContinuation(Last, End); + } + return IdInfo{ + Last, + SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; +} + +void Lexer::printAdjacentMacroArgs(const char *&First, const char *const End) { + // Skip over parts of the body. + const char *Last = First; + do + ++Last; + while (Last != End && + (isIdentifierBody(*Last) || *Last == '.' || *Last == ',')); + append(First, Last); + First = Last; +} + +bool Lexer::printMacroArgs(const char *&First, const char *const End) { + assert(*First == '('); + put(*First++); + for (;;) { + skipWhitespace(First, End); + if (First == End) + return true; + + if (*First == ')') { + put(*First++); + return false; + } + + // This is intentionally fairly liberal. + if (!(isIdentifierBody(*First) || *First == '.' || *First == ',')) + return true; + + printAdjacentMacroArgs(First, End); + } +} + +/// Looks for an identifier starting from Last. +/// +/// Updates "First" to just past the next identifier, if any. Returns true iff +/// the identifier matches "Id". +bool Lexer::isNextIdentifier(StringRef Id, const char *&First, + const char *const End) { + skipWhitespace(First, End); + if (First == End || !isIdentifierHead(*First)) + return false; + + IdInfo FoundId = lexIdentifier(First, End); + First = FoundId.Last; + return FoundId.Name == Id; +} + +bool Lexer::lexAt(const char *&First, const char *const End) { + // Handle "@import". + ++First; + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + makeToken(pp_at_import); + append("@import "); + if (printAtImportBody(First, End)) + return true; // Error: Could not find semi-colon. + skipWhitespace(First, End); + if (First == End) + return false; + if (!isVerticalWhitespace(*First)) + return true; // Error: Nothing expected after semi-colon. + + skipNewline(First, End); + return false; +} + +bool Lexer::lexDefine(const char *&First, const char *const End) { + makeToken(pp_define); + append("#define "); + skipWhitespace(First, End); + IdInfo Id = lexIdentifier(First, End); + const char *Last = Id.Last; + append(Id.Name); + if (Last == End) + return false; + if (*Last == '(') { + size_t Size = Out.size(); + if (printMacroArgs(Last, End)) { + // Be robust to bad macro arguments, since they can show up in disabled + // code. + Out.resize(Size); + append("(/* invalid */\n"); + skipLine(Last, End); + return false; + } + } + skipWhitespace(Last, End); + if (Last == End) + return false; + if (!isVerticalWhitespace(*Last)) + put(' '); + printDirectiveBody(Last, End); + First = Last; + return false; +} + +bool Lexer::lexPragma(const char *&First, + const char *const End) { + // #pragma. + if (!isNextIdentifier("clang", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang. + if (!isNextIdentifier("module", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module. + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module import. + makeToken(pp_pragma_import); + append("#pragma clang module import "); + printDirectiveBody(First, End); + return false; +} + +bool Lexer::lexEndif(const char *&First, const char *const End) { + // Strip out "#else" if it's empty. + if (top() == pp_else) + popToken(); + + // Strip out "#elif" if they're empty. + while (top() == pp_elif) + popToken(); + + // If "#if" is empty, strip it and skip the "#endif". + if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) { + popToken(); + skipLine(First, End); + return false; + } + + return lexDefault(pp_endif, "endif", First, End); +} + +bool Lexer::lexDefault(TokenKind Kind, StringRef Directive, const char *&First, + const char *const End) { + makeToken(Kind); + put('#').append(Directive).put(' '); + printDirectiveBody(First, End); + return false; +} + +bool Lexer::lexPPLine(const char *&First, const char *const End) { + assert(First != End); + + skipWhitespace(First, End); + assert(First <= End); + if (First == End) + return false; + + if (*First != '#' && *First != '@') { + skipLine(First, End); + assert(First <= End); + return false; + } + + // Handle "@import". + if (*First == '@') + return lexAt(First, End); + + // Handle preprocessing directives. + ++First; // Skip over '#'. + skipWhitespace(First, End); + + if (First == End) + return true; // Error: Invalid preprocessor directive. + + if (!isIdentifierHead(*First)) { + skipLine(First, End); + return false; + } + + // Figure out the token. + IdInfo Id = lexIdentifier(First, End); + First = Id.Last; + auto Kind = llvm::StringSwitch(Id.Name) + .Case("include", pp_include) + .Case("__include_macros", pp___include_macros) + .Case("define", pp_define) + .Case("undef", pp_undef) + .Case("import", pp_import) + .Case("include_next", pp_include_next) + .Case("if", pp_if) + .Case("ifdef", pp_ifdef) + .Case("ifndef", pp_ifndef) + .Case("elif", pp_elif) + .Case("else", pp_else) + .Case("endif", pp_endif) + .Case("pragma", pp_pragma_import) + .Default(pp_none); + if (Kind == pp_none) { + skipLine(First, End); + return false; + } + + if (Kind == pp_endif) + return lexEndif(First, End); + + if (Kind == pp_define) + return lexDefine(First, End); + + if (Kind == pp_pragma_import) + return lexPragma(First, End); + + // Everything else. + return lexDefault(Kind, Id.Name, First, End); +} + +bool Lexer::lexImpl(const char *First, const char *const End) { + while (First != End) + if (lexPPLine(First, End)) + return true; + return false; +} + +bool Lexer::lex(StringRef Bytes) { + bool Error = lexImpl(Bytes.begin(), Bytes.end()); + + if (!Error) { + // Add a trailing newline and an EOF on success. + if (!Out.empty() && Out.back() != '\n') + Out.push_back('\n'); + makeToken(pp_eof); + } + + // Null-terminate the output. This way the memory buffer that's passed to + // Clang will not have to worry about the terminating '\0'. + Out.push_back(0); + Out.pop_back(); + return Error; +} + +bool clang::filterToIncludes(StringRef Input, SmallVectorImpl &Output) { + SmallVector Tokens; + return filterToIncludes(Input, Output, Tokens); +} + +bool clang::filterToIncludes(StringRef Input, SmallVectorImpl &Output, + SmallVectorImpl &Tokens) { + Output.clear(); + Tokens.clear(); + return Lexer(Output, Tokens).lex(Input); +} + +bool clang::filter_to_includes::computeSkippedRanges( + ArrayRef Input, llvm::SmallVectorImpl &Range) { + struct Directive { + enum DirectiveKind { + If, // if/ifdef/ifndef + Else // elif,else + }; + int Offset; + DirectiveKind Kind; + }; + llvm::SmallVector Offsets; + for (const Token &T : Input) { + switch (T.K) { + case pp_if: + case pp_ifdef: + case pp_ifndef: + Offsets.push_back({T.Offset, Directive::If}); + break; + + case pp_elif: + case pp_else: { + if (Offsets.empty()) + return true; + int PreviousOffset = Offsets.back().Offset; + Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); + Offsets.push_back({T.Offset, Directive::Else}); + break; + } + + case pp_endif: { + if (Offsets.empty()) + return true; + int PreviousOffset = Offsets.back().Offset; + Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); + do { + Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind; + if (Kind == Directive::If) + break; + } while (!Offsets.empty()); + break; + } + default: + break; + } + } + return false; +} Index: lib/Lex/HeaderSearch.cpp =================================================================== --- lib/Lex/HeaderSearch.cpp +++ lib/Lex/HeaderSearch.cpp @@ -1637,8 +1637,10 @@ return; std::error_code EC; + SmallString<128> Dir = SearchDir.getDir()->getName(); + FileMgr.makeAbsolutePath(Dir); SmallString<128> DirNative; - llvm::sys::path::native(SearchDir.getDir()->getName(), DirNative); + llvm::sys::path::native(Dir, DirNative); vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem(); for (vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), DirEnd; Dir != DirEnd && !EC; Dir.increment(EC)) { Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -3126,6 +3126,20 @@ // Note that this doesn't affect IsAtPhysicalStartOfLine. } +bool Lexer::skipOver(unsigned NumBytes) { + IsAtPhysicalStartOfLine = true; + IsAtStartOfLine = true; + if ((BufferPtr + NumBytes) > BufferEnd) + return true; + BufferPtr += NumBytes; + return false; +} + +unsigned Lexer::getCurrentBufferOffset() { + assert(BufferPtr >= BufferStart && "Invalid buffer state"); + return BufferPtr - BufferStart; +} + bool Lexer::Lex(Token &Result) { // Start a new token. Result.startToken(); Index: lib/Lex/PPDirectives.cpp =================================================================== --- lib/Lex/PPDirectives.cpp +++ lib/Lex/PPDirectives.cpp @@ -392,6 +392,13 @@ // disabling warnings, etc. CurPPLexer->LexingRawMode = true; Token Tok; + if (Callbacks && HashTokenLoc.isFileID()) { + if (auto SkipLength = Callbacks->getSkippedRangeForExcludedConditionalBlock( + *this, HashTokenLoc, CurLexer->getCurrentBufferOffset())) { + // Skip to the next '#endif' / '#else' / '#elif'. + CurLexer->skipOver(*SkipLength); + } + } while (true) { CurLexer->Lex(Tok); Index: lib/Tooling/CMakeLists.txt =================================================================== --- lib/Tooling/CMakeLists.txt +++ lib/Tooling/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(Inclusions) add_subdirectory(Refactoring) add_subdirectory(ASTDiff) +add_subdirectory(ScanDeps) add_clang_library(clangTooling AllTUsExecution.cpp Index: lib/Tooling/ScanDeps/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Tooling/ScanDeps/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + ) + +add_clang_library(clangScanDeps + DependencyScanner.cpp + OverridenContentsCachingFileSystem.cpp + PPRangeSkipping.cpp + + DEPENDS + ClangDriverOptions + + LINK_LIBS + clangAST + clangBasic + clangCodeGen + clangDriver + clangFrontend + clangFrontendTool + clangLex + clangParse + ) Index: lib/Tooling/ScanDeps/CachedFileContents.h =================================================================== --- /dev/null +++ lib/Tooling/ScanDeps/CachedFileContents.h @@ -0,0 +1,105 @@ +//===- CachedFileContents.h - ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SCAN_DEPS_CACHED_FILE_CONTENTS_H +#define LLVM_CLANG_SCAN_DEPS_CACHED_FILE_CONTENTS_H + +#include "clang/Basic/LLVM.h" +#include "clang/Basic/VirtualFileSystem.h" +#include "clang/Tooling/ScanDeps/PPRangeSkipping.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace clang { +namespace scan_deps { + +/// Represents an opened source file whose content was preprocessed to remove +/// any tokens that don't affect the dependency computation. +class CachedFileContents { +public: + static llvm::ErrorOr open(StringRef Filename, + bool PP, + vfs::FileSystem &ExternalFS); + static llvm::ErrorOr stat(StringRef Filename, + vfs::FileSystem &ExternalFS); + + CachedFileContents(CachedFileContents &&) = default; + CachedFileContents &operator=(CachedFileContents &&) = default; + + CachedFileContents(const CachedFileContents &) = delete; + CachedFileContents &operator=(const CachedFileContents &) = delete; + + using ContentsStringType = llvm::SmallString<1024>; + + StringRef getContents() const { + assert(!HasStatOnly && "no file!"); + return Contents; + } + vfs::Status &getStatus() { return Status; } + const vfs::Status &getStatus() const { return Status; } + + bool hasStatusOnly() const { return HasStatOnly; } + const SkippedRangeMapping *getSkippedRanges() const { return &Mapping; } + + size_t memoryUsage() const { + return sizeof(*this) + Contents.capacity() + Status.getName().size() + + Mapping.getMemorySize(); + } + +private: + CachedFileContents(ContentsStringType Contents, vfs::Status Status, + bool HasStatOnly, SkippedRangeMapping Mapping) + : Contents(std::move(Contents)), Status(std::move(Status)), + HasStatOnly(HasStatOnly), Mapping(std::move(Mapping)) { + // Null terminate the contents. + this->Contents.push_back('\0'); + this->Contents.pop_back(); + } + +protected: + ContentsStringType Contents; + vfs::Status Status; + bool HasStatOnly; + SkippedRangeMapping Mapping; +}; + +/// A CachedFileContents with a lock. +struct ThreadSafeCachedFileContents { + std::mutex Lock; + Optional Value; +}; + +template struct FileSystemStatistics { + T NumStatQueries = {0}; + T NumGoodStatQueries = {0}; + T NumOpenQueries = {0}; + T NumGoodOpenQueries = {0}; + T NumLocallyCachedOpenQueries = {0}; +}; + +using ThreadLocalFileSystemStatistics = FileSystemStatistics; + +struct SharedFileSystemStatistics + : FileSystemStatistics> { + void merge(ThreadLocalFileSystemStatistics &Other) { + NumStatQueries += Other.NumStatQueries; + NumGoodStatQueries += Other.NumGoodStatQueries; + NumOpenQueries += Other.NumOpenQueries; + NumGoodOpenQueries += Other.NumGoodOpenQueries; + NumLocallyCachedOpenQueries += Other.NumLocallyCachedOpenQueries; + // Reset. + Other = ThreadLocalFileSystemStatistics(); + } +}; + +} // end namespace scan_deps +} // end namespace clang + +#endif // LLVM_CLANG_SCAN_DEPS_CACHED_FILE_CONTENTS_H Index: lib/Tooling/ScanDeps/Caches.h =================================================================== --- /dev/null +++ lib/Tooling/ScanDeps/Caches.h @@ -0,0 +1,117 @@ +//===- Caches.h - Global and local cache for cached files -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SCAN_DEPS_CACHES_H +#define LLVM_CLANG_SCAN_DEPS_CACHES_H + +#include "CachedFileContents.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Threading.h" +#include + +namespace clang { +namespace scan_deps { + +/// Each DependencyScannerWorker has a local file cache that allows each worker +/// thread to cache file accesses locally instead of querying the global cache +/// every time. +class LocalFileCache { +public: + void set(StringRef Filename, const CachedFileContents *Entry) { + bool IsInserted = Map.try_emplace(Filename, Entry).second; + (void)IsInserted; + assert(IsInserted && "local cache is updated more than once"); + } + + const CachedFileContents *get(StringRef Filename) { + auto It = Map.find(Filename); + return It == Map.end() ? nullptr : It->getValue(); + } + +private: + llvm::StringMap Map; +}; + +/// The DependencyScannerService has a global cache that is used to cache +/// 'stat' and 'open' calls to the underlying real file system. +/// +/// This cache is sharded based on the hash of the key to reduce the lock +/// contention for the worker threads. +class SharedFileCache { +public: + SharedFileCache() { + // The shared file cache is sharded to reduce lock contention. + // This is reasonable heuristic that produced good results on the 18 core + // iMac Pro. + // 4 threads: 2 shards + // 8 threads: 2 shards + // 12 threads: 3 shards + // 36 threads: 9 shards + NumShards = std::max(16u, std::min(2u, llvm::hardware_concurrency() / 4)); + } + + /// Returns a cache entry for the corresponding key. + /// + /// A new cache entry is created if the key is not in the cache. This is a + /// thread safe call. + ThreadSafeCachedFileContents &get(StringRef Key) { + auto &Shard = Shards[llvm::hash_value(Key) % NumShards]; + std::unique_lock LockGuard(Shard.CacheLock); + auto It = Shard.Cache.try_emplace( + Key, std::unique_ptr()); + auto &Ptr = It.first->getValue(); + // Create the actual cache entry if insert succeeded. + if (It.second) + Ptr = llvm::make_unique(); + return *Ptr; + } + + /// Returns the numbers of shards used by the cache. + /// + /// This call is thread safe. + unsigned getNumShards() const { return NumShards; } + + /// Returns the number of entries in a particular shard. + /// + /// This call is thread safe. + unsigned getShardSize(unsigned Idx) { + assert(Idx < NumShards && "out of bounds index"); + std::unique_lock LockGuard(Shards[Idx].CacheLock); + return Shards[Idx].Cache.size(); + } + + /// Returns the number of bytes used by the shared cache. + /// + /// This call is thread safe. + unsigned getMemoryUsed() { + unsigned BytesUsed = sizeof(*this); + for (unsigned I = 0; I < NumShards; ++I) { + std::unique_lock LockGuard(Shards[I].CacheLock); + for (const auto &Entry : Shards[I].Cache) { + if (Entry.getValue()->Value) + BytesUsed += Entry.getValue()->Value->memoryUsage(); + } + } + return BytesUsed; + } + +private: + struct CacheShard { + std::mutex CacheLock; + llvm::StringMap> Cache; + }; + CacheShard Shards[16]; + unsigned NumShards; +}; + +} // end namespace scan_deps +} // end namespace clang + +#endif // LLVM_CLANG_SCAN_DEPS_CACHES_H Index: lib/Tooling/ScanDeps/DependencyScanner.cpp =================================================================== --- /dev/null +++ lib/Tooling/ScanDeps/DependencyScanner.cpp @@ -0,0 +1,263 @@ +//===- DependencyScanner.cpp - Implements the fast clang-scan-deps --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/ScanDeps/DependencyScanner.h" +#include "CachedFileContents.h" +#include "Caches.h" +#include "OverridenContentsCachingFileSystem.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/FileSystemOptions.h" +#include "clang/Basic/VirtualFileSystem.h" +#include "clang/Driver/Options.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/TextDiagnosticBuffer.h" +#include "clang/FrontendTool/Utils.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Tooling/ScanDeps/PPRangeSkipping.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +using namespace clang; +using namespace scan_deps; + +namespace clang { +namespace scan_deps { + +/// Each DependencyScannerWorker has a persistent compiler instance that +/// owns the state that is reused between subsequent compiler invocations. +class ThreadLocalPersistentCompilerInstance { +public: + ThreadLocalPersistentCompilerInstance( + SharedFileCache &Cache, llvm::opt::OptTable &OptTable, + SharedFileSystemStatistics &SharedStats); + + IntrusiveRefCntPtr getDiagID() const { return DiagID; } + + FileManager *getFileManager(); + + llvm::Expected> + getFileDependencies(llvm::ArrayRef Compilation, + StringRef WorkingDirectory); + +private: + SharedFileCache &Cache; + llvm::opt::OptTable &OptTable; + /// The initial \c DiagnosticIDs are reused between compiler invocations. + IntrusiveRefCntPtr DiagID; + /// The file system is reused between compiler invocations. + IntrusiveRefCntPtr VFS; + /// The file manager is reused between compiler invocations. + IntrusiveRefCntPtr FM; + /// The skipped preprocessor ranges. + PreprocessorSkippedMappings PPSkippedRanges; + SharedFileSystemStatistics &SharedFSStats; + /// The module cache directory. + std::string ModuleCacheDirectory; +}; + +class DependencyScannerServiceImpl { +public: + /// The global file system cache. + SharedFileCache GlobalCache; + /// The whole Clang option table is reused by the entire service. + std::unique_ptr GlobalClangOpts; + /// Global file system statistics. + SharedFileSystemStatistics GlobalFSStats; + + DependencyScannerServiceImpl() + : GlobalClangOpts(driver::createDriverOptTable()) {} +}; + +} // end namespace scan_deps +} // end namespace clang + +namespace { + +class DependenciesToVectorCollector : public DependencyFileGeneratorConsumer { +public: + DependenciesToVectorCollector(std::vector &Dependencies) + : Dependencies(Dependencies) {} + void sawDependency(StringRef Filename) override { + Dependencies.push_back(Filename.str()); + } + +private: + std::vector &Dependencies; +}; + +} // end anonymous namespace + +std::atomic ThreadID = {0}; + +ThreadLocalPersistentCompilerInstance::ThreadLocalPersistentCompilerInstance( + SharedFileCache &Cache, llvm::opt::OptTable &OptTable, + SharedFileSystemStatistics &SharedFSStats) + : Cache(Cache), OptTable(OptTable), DiagID(new DiagnosticIDs()), + SharedFSStats(SharedFSStats) {} + +FileManager *ThreadLocalPersistentCompilerInstance::getFileManager() { + if (FM) + return FM.get(); + VFS = new FilterToIncludesCachingFileSystem(Cache, PPSkippedRanges, + vfs::getRealFileSystem()); + FM = new FileManager(FileSystemOptions(), VFS); + return FM.get(); +} + +llvm::Expected> +ThreadLocalPersistentCompilerInstance::getFileDependencies( + llvm::ArrayRef Compilation, StringRef WorkingDirectory) { + std::vector Args; + if (Compilation.size() < 2 || Compilation[1] != "-cc1") { + // FIXME: better error here. + return llvm::errorCodeToError(std::error_code(1, std::generic_category())); + } + Args.reserve(Compilation.size() - 2); + for (const auto &Arg : Compilation.drop_front(2)) + Args.push_back(Arg.c_str()); + + std::vector Dependencies; + std::unique_ptr Clang(new CompilerInstance()); + + // Buffer diagnostics from argument parsing so that we can output them using a + // well formed diagnostic object. + IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; + DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); + bool Success = CompilerInvocation::CreateFromArgs( + Clang->getInvocation(), Args.data(), Args.data() + Args.size(), Diags, + &OptTable); + Clang->getFrontendOpts().DisableFree = false; + + // Create the actual diagnostics engine. + Clang->createDiagnostics(); + if (!Success) + return llvm::errorCodeToError(std::error_code(1, std::generic_category())); + + // Reuse the filemanager. + assert(!Clang->hasFileManager() && "file manager provided?"); + FileManager *FM = getFileManager(); + + // Honor the intent of the inputs. + VFS->IgnoredFiles.clear(); + for (const auto &Entry : Clang->getHeaderSearchOpts().UserEntries) + VFS->IgnoredFiles.insert(Entry.Path); + // FIXME: We have to collect all filenames that were explicitly passed in + // the build settings and that might be opened, as we want to ensure we + // don't run filter-to-includes on them. + + // Make sure the FSO are updated. + FileSystemOptions FSO = Clang->getFileSystemOpts(); + FSO.WorkingDir = WorkingDirectory; + FM->setFileSystemOpts(FSO); + // FIXME: We need to support -ivfs overlays too. + Clang->setFileManager(FM); + + // FIXME: Reuse the module cache. + if (Clang->getLangOpts().Modules) { + // Module optimization. + // For now use separate cache per lane. + if (ModuleCacheDirectory.empty()) { + SmallString<128> TempDir; + llvm::sys::path::system_temp_directory(true, TempDir); + // FIXME: This is not how it should be done at all. + llvm::sys::path::append(TempDir, "clang-scan-deps_module.cache"); + int I = ThreadID.fetch_add(1); + llvm::sys::path::append(TempDir, llvm::Twine(I)); + ModuleCacheDirectory = TempDir.str(); + llvm::outs() << "module cache dir " << ModuleCacheDirectory << "\n"; + } + Clang->getHeaderSearchOpts().ModuleCachePath = ModuleCacheDirectory; + // The modules don't need the global module index. + Clang->getFrontendOpts().UseGlobalModuleIndex = false; + Clang->getFrontendOpts().GenerateGlobalModuleIndex = false; + // The modules don't need the timestamps. + Clang->getFrontendOpts().IncludeTimestamps = false; + // The files don't need to be embedded in modules. + Clang->getFrontendOpts().ModulesEmbedAllFiles = false; + // There's no need to prune the module cache. + Clang->getHeaderSearchOpts().ModuleCachePruneInterval = 0; + Clang->getHeaderSearchOpts().ModuleCachePruneAfter = 0; + } + + // Inject a custom dependency file generator which just collects the files. + Clang->setOverridenDepFileGenerator( + llvm::make_unique(Dependencies)); + + // We don't need a detailed PP record. + Clang->getPreprocessorOpts().DetailedRecord = false; + + // Set the PP Callbacks which should speed up PP skipping + PPSkippedRanges.reset(); + Clang->setAdditionalPPCallbacks( + llvm::make_unique(PPSkippedRanges)); + + // FIXME: REport errors to client. + Success = ExecuteCompilerInvocation(Clang.get()); + if (!Success) + return llvm::errorCodeToError(std::error_code(1, std::generic_category())); + // Update the shared statistics + SharedFSStats.merge(VFS->getStats()); + return Dependencies; +} + +DependencyScannerService::DependencyScannerService() + : Impl(new DependencyScannerServiceImpl) {} +DependencyScannerService::~DependencyScannerService() {} + +void DependencyScannerService::printStatistics(raw_ostream &OS) const { + // Assemble the trace. + llvm::json::Array ShardSizes; + for (unsigned I = 0; I < Impl->GlobalCache.getNumShards(); ++I) + ShardSizes.push_back(Impl->GlobalCache.getShardSize(I)); + llvm::json::Value GlobalCacheTrace = + llvm::json::Object{{"shards", llvm::json::Value(std::move(ShardSizes))}, + {"mem-used", Impl->GlobalCache.getMemoryUsed()}}; + llvm::json::Value FSTrace = llvm::json::Object{ + {"stats", + llvm::json::Object{ + {"num", llvm::json::Value( + (int64_t)Impl->GlobalFSStats.NumStatQueries.load())}, + {"num-cacheable", + llvm::json::Value( + (int64_t)Impl->GlobalFSStats.NumGoodStatQueries.load())}}}, + {"opens", + llvm::json::Object{ + {"num", llvm::json::Value( + (int64_t)Impl->GlobalFSStats.NumOpenQueries.load())}, + {"num-cacheable", + llvm::json::Value( + (int64_t)Impl->GlobalFSStats.NumGoodOpenQueries.load())}, + {"num-locally-cached", + llvm::json::Value((int64_t)Impl->GlobalFSStats + .NumLocallyCachedOpenQueries.load())}}}}; + + auto Trace = llvm::json::Value( + llvm::json::Object{{"global-cache", GlobalCacheTrace}, {"fs", FSTrace}}); + OS << Trace; +} + +DependencyScannerWorker::DependencyScannerWorker( + const DependencyScannerService &Service) + : CI(new ThreadLocalPersistentCompilerInstance( + Service.Impl->GlobalCache, *Service.Impl->GlobalClangOpts, + Service.Impl->GlobalFSStats)) {} + +DependencyScannerWorker::~DependencyScannerWorker() {} + +llvm::Expected> +DependencyScannerWorker::getFileDependencies(ArrayRef Compilation, + StringRef WorkingDirectory) { + return CI->getFileDependencies(Compilation, WorkingDirectory); +} Index: lib/Tooling/ScanDeps/OverridenContentsCachingFileSystem.h =================================================================== --- /dev/null +++ lib/Tooling/ScanDeps/OverridenContentsCachingFileSystem.h @@ -0,0 +1,68 @@ +//===- OverridenContentsCachingFileSystem.h - clang-scan-deps FS *- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SCAN_DEPS_OVERRIDEN_CONTENTS_CACHING_FS_H +#define LLVM_CLANG_SCAN_DEPS_OVERRIDEN_CONTENTS_CACHING_FS_H + +#include "CachedFileContents.h" +#include "Caches.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/VirtualFileSystem.h" +#include "llvm/ADT/StringSet.h" +#include + +namespace clang { +namespace scan_deps { + +class PreprocessorSkippedMappings; + +/// A virtual file system optimized for the dependency discovery service. +class FilterToIncludesCachingFileSystem : public vfs::FileSystem { +public: + FilterToIncludesCachingFileSystem( + SharedFileCache &SharedCache, + PreprocessorSkippedMappings &PPSkippedRanges, + IntrusiveRefCntPtr ExternalFS) + : SharedCache(SharedCache), PPSkippedRanges(PPSkippedRanges), + ExternalFS(std::move(ExternalFS)) {} + + llvm::ErrorOr status(const Twine &Path) override; + llvm::ErrorOr> + openFileForRead(const Twine &Path) override; + + llvm::ErrorOr getCurrentWorkingDirectory() const override { + return ExternalFS->getCurrentWorkingDirectory(); + } + + std::error_code setCurrentWorkingDirectory(const Twine &Path) override { + return ExternalFS->setCurrentWorkingDirectory(Path); + } + + vfs::directory_iterator dir_begin(const Twine &Dir, + std::error_code &EC) override { + return ExternalFS->dir_begin(Dir, EC); + } + + ThreadLocalFileSystemStatistics &getStats() { return Stats; } + + /// The file system to use for external references. + llvm::StringSet<> IgnoredFiles; + +private: + SharedFileCache &SharedCache; + LocalFileCache FileCache; + PreprocessorSkippedMappings &PPSkippedRanges; + IntrusiveRefCntPtr ExternalFS; + ThreadLocalFileSystemStatistics Stats; +}; + +} // end namespace scan_deps +} // end namespace clang + +#endif // LLVM_CLANG_SCAN_DEPS_OVERRIDEN_CONTENTS_CACHING_FS_H Index: lib/Tooling/ScanDeps/OverridenContentsCachingFileSystem.cpp =================================================================== --- /dev/null +++ lib/Tooling/ScanDeps/OverridenContentsCachingFileSystem.cpp @@ -0,0 +1,221 @@ +//===- OverridenContentsCachingFileSystem.cpp - clang-scan-deps FS --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OverridenContentsCachingFileSystem.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/FileSystemOptions.h" +#include "clang/Frontend/TextDiagnosticBuffer.h" +#include "clang/Lex/FilterToIncludes.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Tooling/ScanDeps/PPRangeSkipping.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Threading.h" + +using namespace clang; +using namespace scan_deps; + +// FIXME: we need to cache the directory listings. + +namespace { + +/// Returns true if the given file should be cached. +/// FIXME: We can probably get rid of this. +static bool shouldCacheFile(StringRef Filename) { + // Don't cache the source files. + return !(Filename.endswith(".c") || Filename.endswith(".cpp") || + Filename.endswith(".cc") || Filename.endswith(".cxx") || + Filename.endswith(".m") || Filename.endswith(".mm")); +} + +/// Provide a file wrapper with an overriden status. +class OverridenFileContentsWithFixedStatus : public vfs::File { + std::unique_ptr Buffer; + vfs::Status S; + +public: + OverridenFileContentsWithFixedStatus( + std::unique_ptr Buffer, vfs::Status S) + : Buffer(std::move(Buffer)), S(std::move(S)) {} + + llvm::ErrorOr status() override { return S; } + + const llvm::MemoryBuffer *getBufferPtr() const { return Buffer.get(); } + + llvm::ErrorOr> + getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, + bool IsVolatile) override { + return std::move(Buffer); + } + + std::error_code close() override { return {}; } +}; + +} // end anonymous namespace + +llvm::ErrorOr +CachedFileContents::open(StringRef Filename, bool PP, vfs::FileSystem &ExternalFS) { + /// Open the file from the external file system. + auto ExternalFile = ExternalFS.openFileForRead(Filename); + if (!ExternalFile) + return ExternalFile.getError(); + auto ExternalStatus = (*ExternalFile)->status(); + if (!ExternalStatus) + return ExternalStatus.getError(); + vfs::File &OpenedFile = **ExternalFile; + llvm::ErrorOr> Buffer = + OpenedFile.getBuffer((*ExternalStatus).getName()); + if (!Buffer) + return Buffer.getError(); + + // Preprocess the file by stripping away uneccessary source text. + ContentsStringType Contents; + llvm::SmallVector Tokens; + SkippedRangeMapping Mapping; + if (PP || filterToIncludes((*Buffer)->getBuffer(), Contents, Tokens)) { + // Use the original file if preprocessing failed. + Contents = (*Buffer)->getBuffer(); + } else { + // Compute the skipped PP ranges that speedup skipping over inactive + // preprocessor blocks. + llvm::SmallVector SkippedRanges; + filter_to_includes::computeSkippedRanges(Tokens, SkippedRanges); + for (const auto &Range : SkippedRanges) { + if (Range.Length < 16) { + // Ignore small ranges as non-profitable. + continue; + } + Mapping[Range.Offset] = Range.Length; + } + } + + size_t Size = Contents.size(); + return CachedFileContents(std::move(Contents), + vfs::Status::copyWithNewSize(*ExternalStatus, Size), + /*HasStatOnly=*/false, std::move(Mapping)); +} + +llvm::ErrorOr +CachedFileContents::stat(StringRef Filename, vfs::FileSystem &ExternalFS) { + auto ExternalStatus = ExternalFS.status(Filename); + if (!ExternalStatus) + return ExternalStatus.getError(); + return CachedFileContents(ContentsStringType(), std::move(*ExternalStatus), + /*HasStatOnly=*/true, SkippedRangeMapping()); +} + +static const CachedFileContents *openFileForRead(SharedFileCache &GlobalCache, + vfs::FileSystem &ExternalFS, + StringRef Filename, + std::error_code &Err, + bool StatOnly = false) { + ThreadSafeCachedFileContents &CacheEntry = GlobalCache.get(Filename); + std::unique_lock LockGuard(CacheEntry.Lock); + if (!CacheEntry.Value || (!StatOnly && CacheEntry.Value->hasStatusOnly())) { + auto Value = StatOnly ? CachedFileContents::stat(Filename, ExternalFS) + : CachedFileContents::open(Filename, false, ExternalFS); + if (!Value) { + Err = Value.getError(); + return nullptr; + } + CacheEntry.Value = std::move(*Value); + } + return CacheEntry.Value.getPointer(); +} + +llvm::ErrorOr +FilterToIncludesCachingFileSystem::status(const Twine &Path) { + std::string OwnedFilename; + StringRef Filename; + if (Path.isSingleStringRef()) { + Filename = Path.getSingleStringRef(); + } else { + OwnedFilename = Path.str(); + Filename = OwnedFilename; + } + + // FIXME: We could still cache these stat calls. + Stats.NumStatQueries++; + if (IgnoredFiles.count(Filename)) + return ExternalFS->status(Path); + + if (!shouldCacheFile(Filename)) + return ExternalFS->status(Path); + Stats.NumGoodStatQueries++; + + // Check if we have cached this file in the local cache. + std::error_code Err; + if (const auto *Entry = ::openFileForRead(SharedCache, *ExternalFS, Filename, + Err, /*StatOnly=*/true)) + return Entry->getStatus(); + return Err; +} + +/// Creates a new file entry. +static std::unique_ptr +createFile(const CachedFileContents &File, + PreprocessorSkippedMappings &BufferMappings) { + const vfs::Status &Status = File.getStatus(); + auto Result = llvm::make_unique( + llvm::MemoryBuffer::getMemBuffer(File.getContents(), Status.getName(), + /*RequiresNullTerminator=*/false), + Status); + if (!File.getSkippedRanges()->empty()) + BufferMappings.setSkippedRanges(Result->getBufferPtr(), + File.getSkippedRanges()); + return Result; +} + +llvm::ErrorOr> +FilterToIncludesCachingFileSystem::openFileForRead(const Twine &Path) { + std::string OwnedFilename; + StringRef Filename; + if (Path.isSingleStringRef()) { + Filename = Path.getSingleStringRef(); + } else { + OwnedFilename = Path.str(); + Filename = OwnedFilename; + } + + // FIXME: Better modules support! + Stats.NumOpenQueries++; + if (IgnoredFiles.count(Filename) || Filename.endswith(".pcm")) + return ExternalFS->openFileForRead(Path); + + bool PP = Filename.endswith(".modulemap") || + Filename.endswith(".module") || + Filename.endswith("module.map"); + if (!shouldCacheFile(Filename) || PP) { + // FIXME: We don't need the skipped mapping! + auto E = CachedFileContents::open(Filename, PP, *ExternalFS); + if (!E) + return E.getError(); + CachedFileContents &Value = *E; + // Create a copy of the buffer since the contents is not cached. + auto Buf = llvm::MemoryBuffer::getMemBufferCopy( + Value.getContents(), Value.getStatus().getName()); + return llvm::make_unique( + std::move(Buf), std::move(Value.getStatus())); + } + Stats.NumGoodOpenQueries++; + + // Check if we have cached this file in the local cache. + if (const auto *Entry = FileCache.get(Filename)) { + ++Stats.NumLocallyCachedOpenQueries; + return createFile(*Entry, PPSkippedRanges); + } + std::error_code Err; + if (const auto *Entry = + ::openFileForRead(SharedCache, *ExternalFS, Filename, Err)) { + FileCache.set(Filename, Entry); + return createFile(*Entry, PPSkippedRanges); + } + return Err; +} Index: lib/Tooling/ScanDeps/PPRangeSkipping.cpp =================================================================== --- /dev/null +++ lib/Tooling/ScanDeps/PPRangeSkipping.cpp @@ -0,0 +1,76 @@ +//===- PPRangeSkipping.cpp - PP Callbacks to skip inactive range ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/ScanDeps/PPRangeSkipping.h" +#include "clang/Lex/Preprocessor.h" + +using namespace clang; +using namespace scan_deps; + +Optional +PPRangeSkippingCallbacks::getSkippedRangeForExcludedConditionalBlock( + Preprocessor &PP, SourceLocation HashLoc, unsigned CurLexerBufferOffset) { + if (FileStack.empty()) + return None; + const SourceManager &SM = PP.getSourceManager(); + + // Retrieve the information about the file we're currently lexing. + auto &Info = FileStack.back(); + if (!Info.isValid()) { + std::pair F = SM.getDecomposedLoc(Info.Loc); + Info.File = F.first; + const auto *Buf = SM.getBuffer(F.first); + if (const auto *PPSkippedRanges = BufferMappings.getSkippedRanges(Buf)) { + Info.PPSkippedRanges = PPSkippedRanges; + } + } + + if (!Info.PPSkippedRanges) + return None; + + auto HashFileOffset = SM.getDecomposedLoc(HashLoc); + if (HashFileOffset.first != Info.File) + return None; + // Check if the offset of '#' is mapped in the skipped ranges. + auto It = Info.PPSkippedRanges->find(HashFileOffset.second); + if (It == Info.PPSkippedRanges->end()) { +#if 0 + // FIXME: Scan-deps: investigate and fix the remaining unskipped PP ranges + llvm::errs() << "Missing ... " << SM.getBuffer(Info.File)->getBufferIdentifier() << "\n"; + HashLoc.dump(SM); llvm::errs()<< "\n"; +#endif + return None; + } + unsigned BytesToSkip = It->getSecond(); + assert(CurLexerBufferOffset >= HashFileOffset.second && + "lexer is before the hash?"); + // Take into account the fact that the lexer has already advanced, so the + // number of bytes to skip must be adjusted. + unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second; + assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?"); + return BytesToSkip - LengthDiff; +} + +void PPRangeSkippingCallbacks::FileChanged(SourceLocation Loc, + FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) { + switch (Reason) { + case EnterFile: + if (Loc.isValid()) + FileStack.push_back(Loc); + break; + case ExitFile: + if (!FileStack.empty()) + FileStack.pop_back(); + break; + default: + break; + } +} Index: test/CMakeLists.txt =================================================================== --- test/CMakeLists.txt +++ test/CMakeLists.txt @@ -56,6 +56,8 @@ clang-refactor clang-diff hmaptool + clang-scan-deps + clang-filter-to-includes ) if(CLANG_ENABLE_STATIC_ANALYZER) Index: test/ClangScanDeps/Inputs/simple_cdb.json =================================================================== --- /dev/null +++ test/ClangScanDeps/Inputs/simple_cdb.json @@ -0,0 +1,7 @@ +[ +{ + "directory": "DIR", + "command": "clang -cc1 -Eonly -o /dev/null -dependency-file foo -MT deps DIR/simple.cpp -IInputs", + "file": "DIR/simple.cpp" +} +] Index: test/ClangScanDeps/simple-c-api.cpp =================================================================== --- /dev/null +++ test/ClangScanDeps/simple-c-api.cpp @@ -0,0 +1,7 @@ +// RUN: c-index-test core -scan-deps %S -- clang_tool -cc1 -Eonly -o /dev/null -dependency-file %t -MT deps %s -I %S/Inputs | FileCheck %s + +#include "header.h" + +// CHECK: dependencies: +// CHECK-NEXT: simple-c-api.cpp +// CHECK-NEXT: Inputs/header.h Index: test/ClangScanDeps/simple.cpp =================================================================== --- /dev/null +++ test/ClangScanDeps/simple.cpp @@ -0,0 +1,14 @@ +// RUN: rm -rf %t.dir +// RUN: rm -rf %t.cdb +// RUN: mkdir -p %t.dir +// RUN: cp %s %t.dir/simple.cpp +// RUN: mkdir %t.dir/Inputs +// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h +// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/simple_cdb.json > %t.cdb +// RUN: clang-scan-deps -compilation-database %t.cdb -print-dependencies -j 1 | FileCheck %s + +#include "header.h" + +// CHECK: DEPENDENCIES +// CHECK-NEXT: simple.cpp +// CHECK-NEXT: Inputs/header.h Index: test/FilterToIncludes/simple.cpp =================================================================== --- /dev/null +++ test/FilterToIncludes/simple.cpp @@ -0,0 +1,15 @@ +// RUN: clang-filter-to-includes %s | grep -v CHECK | FileCheck %s + +#ifdef A +void missing(); +#include "header.h" +#else +void foo(); +#endif +// hello +#define B + +// CHECK: #ifdef A +// CHECK-NEXT: #include "header.h" +// CHECK-NEXT: #endif +// CHECK-NEXT: #define B Index: tools/CMakeLists.txt =================================================================== --- tools/CMakeLists.txt +++ tools/CMakeLists.txt @@ -8,6 +8,8 @@ add_clang_subdirectory(clang-fuzzer) add_clang_subdirectory(clang-import-test) add_clang_subdirectory(clang-offload-bundler) +add_clang_subdirectory(clang-filter-to-includes) +add_clang_subdirectory(clang-scan-deps) add_clang_subdirectory(c-index-test) Index: tools/clang-filter-to-includes/CMakeLists.txt =================================================================== --- /dev/null +++ tools/clang-filter-to-includes/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS support) + +add_clang_tool(clang-filter-to-includes + ClangFilterToIncludes.cpp + ) + +set(CLANG_FILTER_TO_INCLUDES_LIB_DEPS + clangBasic + clangLex + ) + +target_link_libraries(clang-filter-to-includes + PRIVATE + ${CLANG_FILTER_TO_INCLUDES_LIB_DEPS} + ) Index: tools/clang-filter-to-includes/ClangFilterToIncludes.cpp =================================================================== --- /dev/null +++ tools/clang-filter-to-includes/ClangFilterToIncludes.cpp @@ -0,0 +1,62 @@ +//===-- clang-filter-to-includes/ClangFilterToIncludes.cpp ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements clang-filter-to-includes. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/FilterToIncludes.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace clang; +using namespace clang::filter_to_includes; + +static bool filter(StringRef Filename, SmallVectorImpl &Out, + SmallVectorImpl &Tokens) { + ErrorOr> CodeOrErr = + MemoryBuffer::getFileOrSTDIN(Filename, /* FileSize = */ -1, + /* RequiresNullTerminator = */ false); + if (std::error_code EC = CodeOrErr.getError()) { + errs() << EC.message() << "\n"; + return true; + } + std::unique_ptr Code = std::move(CodeOrErr.get()); + if (Code->getBufferSize() == 0) + return false; // Empty files are formatted correctly. + + StringRef AssumedFilename = Filename == "-" ? "/dev/stdin" : Filename; + //llvm::errs() << "filename = " << AssumedFilename << "\n"; + bool Error = filterToIncludes( + StringRef(Code->getBufferStart(), Code->getBufferSize()), Out, Tokens); + + if (Error) + llvm::errs() << "error: could not ppd-tokenize '" << AssumedFilename << "'\n"; + + // Emit partial results. + llvm::outs() << StringRef(Out.begin(), Out.size()); + + return Error; +} + +int main(int, const char **argv) { + SmallVector Out; + SmallVector Tokens; + + ++argv; + if (!*argv) + return filter("-", Out, Tokens) ? 1 : 0; + + for (; *argv; ++argv) + if (filter(*argv, Out, Tokens)) + return 1; + + return 0; +} Index: tools/clang-scan-deps/CMakeLists.txt =================================================================== --- /dev/null +++ tools/clang-scan-deps/CMakeLists.txt @@ -0,0 +1,27 @@ +set(LLVM_LINK_COMPONENTS + Core + Support +) + +add_clang_tool(clang-scan-deps + ClangScanDeps.cpp + ) + +set(CLANG_SCAN_DEPS_LIB_DEPS + clangAST + clangBasic + clangCodeGen + clangDriver + clangFrontend + clangFrontendTool + clangLex + clangParse + clangTooling + clangScanDeps + ) + +target_link_libraries(clang-scan-deps + PRIVATE + ${CLANG_SCAN_DEPS_LIB_DEPS} + ) + Index: tools/clang-scan-deps/ClangScanDeps.cpp =================================================================== --- /dev/null +++ tools/clang-scan-deps/ClangScanDeps.cpp @@ -0,0 +1,364 @@ +//===-- ClangScanDeps.cpp - Implementation of clang-scan-deps -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/JSONCompilationDatabase.h" +#include "clang/Tooling/ScanDeps/DependencyScanner.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Options.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ThreadLocal.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/Timer.h" +#include + +// FIXME: All these is a hack from libTooling for dirty compilation command -> +// CC1 command conversion. +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/Job.h" +#include "clang/Driver/Options.h" +#include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" +#include "clang/Frontend/ASTUnit.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/FrontendDiagnostic.h" +#include "clang/Frontend/FrontendOptions.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Tooling/ArgumentsAdjusters.h" +#include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" + +using namespace clang; +using namespace scan_deps; +using namespace clang::tooling; + +static llvm::cl::opt + CDBFile("compilation-database", llvm::cl::Required, + llvm::cl::desc("Path to the compilation database file")); + +enum DependencyScannerType { MDOnly, Eonly }; + +static llvm::cl::opt DependencyScanner( + "scanner", + llvm::cl::desc("Which tool should be used to compute the dependency"), + llvm::cl::values( + clEnumValN( + MDOnly, "fast-deps", + "The set of dependencies is computed by using scanDeps library"), + clEnumValN(Eonly, "pp", + "The set of dependencies is computed by running -Eonly " + "equivalent")), + llvm::cl::init(MDOnly)); + +static llvm::cl::opt + NumThreads("j", llvm::cl::Optional, + llvm::cl::desc("Number of worker threads to use"), + llvm::cl::init(0)); + +static llvm::cl::opt +CDBLimit("compilation-limit", llvm::cl::Optional, + llvm::cl::desc("Limit the number of compilation entries to use"), + llvm::cl::init(0)); + +static llvm::cl::opt SaveDepsTo( + "write-dep-files-to", llvm::cl::Optional, + llvm::cl::desc( + "The directory to which the dependency files should be saved")); + +static llvm::cl::opt + PrintDeps("print-dependencies", llvm::cl::Optional, + llvm::cl::desc("print out the dependencies to stdout")); + +namespace { + +struct CompilationRequest { + std::vector Compilation; + std::string WorkingDirectory; +}; + +/// Feeds the worker threads with the compilation queue. +class WorkQueue { +public: + WorkQueue(std::vector CompilationRequests) + : CompilationRequests(std::move(CompilationRequests)) {} + + Optional getCompilation() { + std::unique_lock LockGuard(Lock); + if (Index >= CompilationRequests.size()) + return None; + return std::move(CompilationRequests[Index++]); + } + +private: + std::vector CompilationRequests; + size_t Index = 0; + std::mutex Lock; +}; + +class ScanDepsTool { +public: + virtual ~ScanDepsTool() {} + + virtual void computeDependencies(unsigned WorkerIdx, + const CompilationRequest &Request) = 0; + + virtual std::string getStatistics() const { return ""; } +}; + +/// This tool invokes the fast dependency scanner. +class DependencyScannerTool : public ScanDepsTool { +public: + DependencyScannerTool(unsigned NumWorkers) { + for (unsigned I = 0; I < NumWorkers; ++I) + Workers.push_back(llvm::make_unique(Service)); + } + + void computeDependencies(unsigned WorkerIdx, + const CompilationRequest &Request) override { + auto Result = Workers[WorkerIdx]->getFileDependencies( + Request.Compilation, Request.WorkingDirectory); + if (!Result) { + llvm::logAllUnhandledErrors(Result.takeError(), llvm::errs(), "error: "); + return; + } + if (PrintDeps) { + llvm::outs() << "DEPENDENCIES\n"; + for (const auto &Entry : *Result) + llvm::outs() << Entry << "\n"; + } + } + + virtual std::string getStatistics() const override { + std::string Str; + llvm::raw_string_ostream OS(Str); + Service.printStatistics(OS); + return OS.str(); + } + +private: + DependencyScannerService Service; + std::vector> Workers; +}; + +/// This tool invokes the -Eonly mode. +class EonlyTool : public ScanDepsTool { +public: + EonlyTool(const char *Argv0) { + // Exists solely for the purpose of lookup of the executable path. + static int StaticSymbol; + // Compute the clang binary path that we can invoke. + std::string ExecutablePath = + llvm::sys::fs::getMainExecutable(Argv0, &StaticSymbol); + SmallString<256> Directory = llvm::sys::path::parent_path(ExecutablePath); + llvm::sys::path::append(Directory, "clang"); + ClangBinaryPath = Directory.str(); + } + + void computeDependencies(unsigned WorkerIdx, + const CompilationRequest &Request) override { + std::vector Args; + for (const auto &A : Request.Compilation) + Args.push_back(A); + Args.push_back("-working-directory"); + Args.push_back(Request.WorkingDirectory); + int R = llvm::sys::ExecuteAndWait(ClangBinaryPath, Args); + (void)R; + return; + } + +private: + std::string ClangBinaryPath; +}; + +/// Builds a clang driver initialized for running clang tools. +static driver::Driver *newDriver( + DiagnosticsEngine *Diagnostics, const char *BinaryName, + IntrusiveRefCntPtr VFS) { + driver::Driver *CompilerDriver = + new driver::Driver(BinaryName, llvm::sys::getDefaultTargetTriple(), + *Diagnostics, std::move(VFS)); + CompilerDriver->setTitle("clang_based_tool"); + return CompilerDriver; +} + +/// Retrieves the clang CC1 specific flags out of the compilation's jobs. +/// +/// Returns nullptr on error. +static const llvm::opt::ArgStringList *getCC1Arguments( + DiagnosticsEngine *Diagnostics, driver::Compilation *Compilation) { + // We expect to get back exactly one Command job, if we didn't something + // failed. Extract that job from the Compilation. + const driver::JobList &Jobs = Compilation->getJobs(); + if (Jobs.size() != 1 || !isa(*Jobs.begin())) { + SmallString<256> error_msg; + llvm::raw_svector_ostream error_stream(error_msg); + Jobs.Print(error_stream, "; ", true); + Diagnostics->Report(diag::err_fe_expected_compiler_job) + << error_stream.str(); + return nullptr; + } + + // The one job we find should be to invoke clang again. + const auto &Cmd = cast(*Jobs.begin()); + if (StringRef(Cmd.getCreator().getName()) != "clang") { + Diagnostics->Report(diag::err_fe_expected_clang_command); + return nullptr; + } + + return &Cmd.getArguments(); +} + +// FIXME: This code is copy from libTooling, share it. +bool convertCDB(ArrayRef CommandLine, + llvm::function_ref CB, + DiagnosticConsumer *DiagConsumer = nullptr) { + std::vector Argv; + for (const auto &Str : CommandLine) + Argv.push_back(Str.c_str()); + const char *const BinaryName = Argv[0]; + llvm::IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + unsigned MissingArgIndex, MissingArgCount; + std::unique_ptr Opts = driver::createDriverOptTable(); + llvm::opt::InputArgList ParsedArgs = Opts->ParseArgs( + ArrayRef(Argv).slice(1), MissingArgIndex, MissingArgCount); + ParseDiagnosticArgs(*DiagOpts, ParsedArgs); + TextDiagnosticPrinter DiagnosticPrinter( + llvm::errs(), &*DiagOpts); + DiagnosticsEngine Diagnostics( + IntrusiveRefCntPtr(new DiagnosticIDs()), &*DiagOpts, + DiagConsumer ? DiagConsumer : &DiagnosticPrinter, false); + //auto VFS = vfs::getRealFileSystem(); + //VFS->setCurrentWorkingDirectory(WorkingDirectory); + const std::unique_ptr Driver( + newDriver(&Diagnostics, BinaryName, nullptr)); + // Since the input might only be virtual, don't check whether it exists. + Driver->setCheckInputsExist(false); + const std::unique_ptr Compilation( + Driver->BuildCompilation(llvm::makeArrayRef(Argv))); + if (!Compilation) + return true; + const llvm::opt::ArgStringList *const CC1Args = getCC1Arguments( + &Diagnostics, Compilation.get()); + if (!CC1Args) + return true; + return CB(*CC1Args); +} + +} // end anonymous namespace + +int main(int argc, const char **argv) { + llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + llvm::cl::ParseCommandLineOptions(argc, argv, "Dependency scanner tool"); + + std::string ErrorMessage; + auto CompilationDB = JSONCompilationDatabase::loadFromFile( + CDBFile, ErrorMessage, JSONCommandLineSyntax::AutoDetect); + if (!CompilationDB) { + llvm::errs() << "error: failed to load compilation database: " + << ErrorMessage << "\n"; + return 1; + } + + std::vector Compilations; + auto CDB = CompilationDB->getAllCompileCommands(); + // FIXME: Reset / set CWD. + for (const auto &Command : + llvm::enumerate(llvm::makeArrayRef(CDB).take_front(CDBLimit != 0 ? CDBLimit : CDB.size()))) { + auto CommandLine = Command.value().CommandLine; + if (CommandLine.size() < 2) { + llvm::errs() << "error: invalid command line!\n"; + return 1; + } + if (CommandLine[1] != "-cc1") { + + CommandLine.push_back("-resource-dir=" + + CompilerInvocation::GetResourcesPath(argv[0], (void *)&convertCDB)); + + // Construct a -cc1 invocation. + std::vector OverridenCommandLine; + OverridenCommandLine.push_back(CommandLine[0]); + if (convertCDB(CommandLine,[&](const llvm::opt::ArgStringList &CC1Args) { + for (const auto &Arg : llvm::makeArrayRef(CC1Args.data(), CC1Args.data() + CC1Args.size())) + OverridenCommandLine.push_back(Arg); + return false; + })) { + llvm::errs() << "error: invalid driver invocation!\n"; + return 1; + } + CommandLine = std::move(OverridenCommandLine); + } + CommandLine.push_back("-Eonly"); + CommandLine.push_back("-o"); + CommandLine.push_back("/dev/null"); + CommandLine.push_back("-sys-header-deps"); + CommandLine.push_back("-dependency-file"); + CommandLine.push_back("temp_file"); + CommandLine.push_back("-MT"); + CommandLine.push_back("deps"); + CommandLine.push_back("-fmodule-format=raw"); + if (!SaveDepsTo.empty()) { + CommandLine.push_back("-dependency-file"); + llvm::SmallString<128> Path(SaveDepsTo.begin(), SaveDepsTo.end()); + llvm::sys::path::append(Path, Twine(Command.index()) + ".d"); + CommandLine.push_back(Path.str()); + } + Compilations.push_back( + CompilationRequest{std::move(CommandLine), Command.value().Directory}); + } + + unsigned NumWorkers = + NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads; + llvm::outs() << "Running " << Compilations.size() << " over " << NumWorkers + << " workers\n"; + WorkQueue Tasks(std::move(Compilations)); + + std::unique_ptr Tool = + DependencyScanner == MDOnly + ? std::unique_ptr( + llvm::make_unique(NumWorkers)) + : llvm::make_unique(argv[0]); + std::vector Workers; + llvm::Timer DepScanTimer("dep-scan", "Dependency scanning time"); + { + llvm::TimeRegion Region(DepScanTimer); + for (unsigned I = 0; I < NumWorkers; ++I) { + Workers.emplace_back([&Tool, &Tasks, I]() { + while (true) { + auto Compilation = Tasks.getCompilation(); + if (!Compilation) + return; + Tool->computeDependencies(I, *Compilation); + } + }); + } + for (auto &W : Workers) + W.join(); + } + auto TR = DepScanTimer.getTotalTime(); + + llvm::outs() << "STATISTICS\n" << Tool->getStatistics() << "\n"; + + llvm::json::Value Report = llvm::json::Object{ + {"time", llvm::json::Object{{"wall", TR.getWallTime()}, + {"user", TR.getUserTime()}, + {"sys", TR.getSystemTime()}}}}; + llvm::outs() << "TIMINGS\n" << Report; + DepScanTimer.clear(); + return 0; +} Index: unittests/Lex/CMakeLists.txt =================================================================== --- unittests/Lex/CMakeLists.txt +++ unittests/Lex/CMakeLists.txt @@ -3,6 +3,7 @@ ) add_clang_unittest(LexTests + FilterToIncludesTest.cpp HeaderMapTest.cpp HeaderSearchTest.cpp LexerTest.cpp Index: unittests/Lex/FilterToIncludesTest.cpp =================================================================== --- /dev/null +++ unittests/Lex/FilterToIncludesTest.cpp @@ -0,0 +1,442 @@ +//===- unittests/Lex/FilterToIncludesTest.cpp - filterToIncludes --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------===// + +#include "clang/Lex/FilterToIncludes.h" +#include "llvm/ADT/SmallString.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; +using namespace clang::filter_to_includes; + +namespace { + +TEST(FilterToIncludesTest, Empty) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE(filterToIncludes("", Out, Tokens)); + EXPECT_TRUE(Out.empty()); + ASSERT_EQ(1u, Tokens.size()); + ASSERT_EQ(pp_eof, Tokens.back().K); + + ASSERT_FALSE(filterToIncludes("abc def\nxyz", Out, Tokens)); + EXPECT_TRUE(Out.empty()); + ASSERT_EQ(1u, Tokens.size()); + ASSERT_EQ(pp_eof, Tokens.back().K); +} + +TEST(FilterToIncludesTest, AllTokens) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE(filterToIncludes("#define A\n" + "#undef A\n" + "#endif\n" + "#if A\n" + "#ifdef A\n" + "#ifndef A\n" + "#elif A\n" + "#else\n" + "#include \n" + "#include_next \n" + "#__include_macros \n" + "#import \n" + "@import A;\n" + "#pragma clang module import A\n", + Out, Tokens)); + EXPECT_EQ(pp_define, Tokens[0].K); + EXPECT_EQ(pp_undef, Tokens[1].K); + EXPECT_EQ(pp_endif, Tokens[2].K); + EXPECT_EQ(pp_if, Tokens[3].K); + EXPECT_EQ(pp_ifdef, Tokens[4].K); + EXPECT_EQ(pp_ifndef, Tokens[5].K); + EXPECT_EQ(pp_elif, Tokens[6].K); + EXPECT_EQ(pp_else, Tokens[7].K); + EXPECT_EQ(pp_include, Tokens[8].K); + EXPECT_EQ(pp_include_next, Tokens[9].K); + EXPECT_EQ(pp___include_macros, Tokens[10].K); + EXPECT_EQ(pp_import, Tokens[11].K); + EXPECT_EQ(pp_at_import, Tokens[12].K); + EXPECT_EQ(pp_pragma_import, Tokens[13].K); + EXPECT_EQ(pp_eof, Tokens[14].K); +} + +TEST(FilterToIncludesTest, Define) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE(filterToIncludes("#define MACRO", Out, Tokens)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + ASSERT_EQ(2u, Tokens.size()); + ASSERT_EQ(pp_define, Tokens.front().K); +} + +TEST(FilterToIncludesTest, DefineSpacing) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO\n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO \n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO a \n\n\n", Out)); + EXPECT_STREQ("#define MACRO a\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO\n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); +} + +TEST(FilterToIncludesTest, DefineMacroArguments) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO()", Out)); + EXPECT_STREQ("#define MACRO()\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO(a, b...)", Out)); + EXPECT_STREQ("#define MACRO(a,b...)\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO content", Out)); + EXPECT_STREQ("#define MACRO content\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO con tent ", Out)); + EXPECT_STREQ("#define MACRO con tent\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO() con tent ", Out)); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); +} + +TEST(FilterToIncludesTest, DefineInvalidMacroArguments) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO((a))", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO(", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO(a * b)", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); +} + +TEST(FilterToIncludesTest, DefineHorizontalWhitespace) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO(\t)\tcon \t tent\t", Out)); + EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO(\f)\fcon \f tent\f", Out)); + EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO(\v)\vcon \v tent\v", Out)); + EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data()); + + ASSERT_FALSE( + filterToIncludes("#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out)); + EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data()); +} + +TEST(FilterToIncludesTest, DefineMultilineArgs) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO(a \\\n" + " )", + Out)); + EXPECT_STREQ("#define MACRO(a)\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define MACRO(a, \\\n" + " b) \\\n" + " call((a), \\\n" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(FilterToIncludesTest, DefineMultilineArgsCarriageReturn) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO(a, \\\r" + " b) \\\r" + " call((a), \\\r" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(FilterToIncludesTest, DefineMultilineArgsCarriageReturnNewline) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO(a, \\\r\n" + " b) \\\r\n" + " call((a), \\\r\n" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(FilterToIncludesTest, DefineMultilineArgsNewlineCarriageReturn) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO(a, \\\n\r" + " b) \\\n\r" + " call((a), \\\n\r" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(FilterToIncludesTest, MultilineComment) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO a/*\n" + " /*\n" + "#define MISSING abc\n" + " /*\n" + " /* something */ \n" + "#include /* \"def\" */ \n", + Out)); + EXPECT_STREQ("#define MACRO a\n" + "#include \n", + Out.data()); +} + +TEST(FilterToIncludesTest, MultilineCommentInStrings) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#define MACRO1 \"/*\"\n" + "#define MACRO2 \"*/\"\n", + Out)); + EXPECT_STREQ("#define MACRO1 \"/*\"\n" + "#define MACRO2 \"*/\"\n", + Out.data()); +} + +TEST(FilterToIncludesTest, Ifdef) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#ifdef A\n" + "#define B\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifdef A\n" + "#define B\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(filterToIncludes("#ifdef A\n" + "#define B\n" + "#elif B\n" + "#define C\n" + "#elif C\n" + "#define D\n" + "#else\n" + "#define E\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifdef A\n" + "#define B\n" + "#elif B\n" + "#define C\n" + "#elif C\n" + "#define D\n" + "#else\n" + "#define E\n" + "#endif\n", + Out.data()); +} + +TEST(FilterToIncludesTest, EmptyIfdef) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#ifdef A\n" + "#elif B\n" + "#elif C\n" + "#else D\n" + "#endif\n", + Out)); + EXPECT_STREQ("", Out.data()); +} + +TEST(FilterToIncludesTest, Pragma) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#pragma A\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(filterToIncludes("#pragma clang\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(filterToIncludes("#pragma clang module\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(filterToIncludes("#pragma clang module impor\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(filterToIncludes("#pragma clang module import\n", Out)); + EXPECT_STREQ("#pragma clang module import\n", Out.data()); +} + +TEST(FilterToIncludesTest, Include) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#include \"A\"\n", Out)); + EXPECT_STREQ("#include \"A\"\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#include \n", Out)); + EXPECT_STREQ("#include \n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#include_next \n", Out)); + EXPECT_STREQ("#include_next \n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#import \n", Out)); + EXPECT_STREQ("#import \n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#__include_macros \n", Out)); + EXPECT_STREQ("#__include_macros \n", Out.data()); +} + +TEST(FilterToIncludesTest, AtImport) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("@import A;\n", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(filterToIncludes(" @ import A;\n", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("@import A\n;", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("@import A.B;\n", Out)); + EXPECT_STREQ("@import A.B;\n", Out.data()); + + ASSERT_FALSE(filterToIncludes( + "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \n /*x*/ ; /*x*/", Out)); + EXPECT_STREQ("@import A.B;\n", Out.data()); +} + +TEST(FilterToIncludesTest, AtImportFailures) { + SmallVector Out; + + ASSERT_TRUE(filterToIncludes("@import A\n", Out)); + ASSERT_TRUE(filterToIncludes("@import MACRO(A);\n", Out)); + ASSERT_TRUE(filterToIncludes("@import \" \";\n", Out)); +} + +TEST(FilterToIncludesTest, RawStringLiteral) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#ifndef GUARD\n" + "#define GUARD\n" + "R\"()\"\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(filterToIncludes( + "#ifndef GUARD\n" + "#define GUARD\n" + R"raw(static constexpr char bytes[] = R"(-?:\,[]{}#&*!|>'"%@`)";)raw" + "\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(filterToIncludes( + "#ifndef GUARD\n" + "#define GUARD\n" + R"raw(static constexpr char bytes[] = R"abc(-?:\,[]{}#&*!|>'"%@`)abc";)raw" + "\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); +} + +TEST(FilterToIncludes, SkippedPPRangesBasic) { + SmallString<128> Out; + SmallVector Toks; + StringRef Source = "#ifndef GUARD\n" + "#define GUARD\n" + "void foo();\n" + "#endif\n"; + ASSERT_FALSE(filterToIncludes(Source, Out, Toks)); + SmallVector Ranges; + ASSERT_FALSE(computeSkippedRanges(Toks, Ranges)); + EXPECT_EQ(Ranges.size(), 1u); + EXPECT_EQ(Ranges[0].Offset, 0); + EXPECT_EQ(Ranges[0].Length, (int)Out.find("#endif")); +} + +TEST(FilterToIncludes, SkippedPPRangesNested) { + SmallString<128> Out; + SmallVector Toks; + StringRef Source = "#ifndef GUARD\n" + "#define GUARD\n" + "#if FOO\n" + "#include hello\n" + "#elif BAR\n" + "#include bye\n" + "#endif\n" + "#else\n" + "#include nothing\n" + "#endif\n"; + ASSERT_FALSE(filterToIncludes(Source, Out, Toks)); + SmallVector Ranges; + ASSERT_FALSE(computeSkippedRanges(Toks, Ranges)); + EXPECT_EQ(Ranges.size(), 4u); + EXPECT_EQ(Ranges[0].Offset, (int)Out.find("#if FOO")); + EXPECT_EQ(Ranges[0].Offset + Ranges[0].Length, (int)Out.find("#elif")); + EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elif BAR")); + EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#endif")); + EXPECT_EQ(Ranges[2].Offset, 0); + EXPECT_EQ(Ranges[2].Length, (int)Out.find("#else")); + EXPECT_EQ(Ranges[3].Offset, (int)Out.find("#else")); + EXPECT_EQ(Ranges[3].Offset + Ranges[3].Length, (int)Out.rfind("#endif")); +} + +TEST(FilterToIncludesTest, SplitIdentifier) { + SmallVector Out; + + ASSERT_FALSE(filterToIncludes("#if\\\n" + "ndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(filterToIncludes("#define GUA\\\n" + "RD\n", + Out)); + EXPECT_STREQ("#define GUARD\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define GUA\\\r" + "RD\n", + Out)); + EXPECT_STREQ("#define GUARD\n", Out.data()); + + ASSERT_FALSE(filterToIncludes("#define GUA\\\n" + " RD\n", + Out)); + EXPECT_STREQ("#define GUA RD\n", Out.data()); +} + +} // end namespace