diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -19,15 +19,41 @@
 
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace clang {
 
+namespace tok {
+enum TokenKind : unsigned short;
+}
+
 class DiagnosticsEngine;
 
 namespace dependency_directives_scan {
 
+/// Token lexed as part of dependency directive scanning.
+struct Token {
+  /// Offset into the original source input.
+  unsigned Offset;
+  unsigned Length;
+  tok::TokenKind Kind;
+  unsigned short Flags;
+
+  Token(unsigned Offset, unsigned Length, tok::TokenKind Kind,
+        unsigned short Flags)
+      : Offset(Offset), Length(Length), Kind(Kind), Flags(Flags) {}
+
+  unsigned getEnd() const { return Offset + Length; }
+
+  bool is(tok::TokenKind K) const { return Kind == K; }
+  bool isNot(tok::TokenKind K) const { return Kind != K; }
+  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
+    return is(K1) || is(K2);
+  }
+  template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const {
+    return is(K1) || isOneOf(Ks...);
+  }
+};
+
 /// Represents the kind of preprocessor directive or a module declaration that
 /// is tracked by the scanner in its token output.
 enum DirectiveKind : uint8_t {
@@ -52,9 +78,10 @@
   pp_else,
   pp_endif,
   decl_at_import,
-  cxx_export_decl,
   cxx_module_decl,
   cxx_import_decl,
+  cxx_export_module_decl,
+  cxx_export_import_decl,
   pp_eof,
 };
 
@@ -62,53 +89,48 @@
 /// scanning. It's used to track various preprocessor directives that could
 /// potentially have an effect on the depedencies.
 struct Directive {
+  ArrayRef<Token> Tokens;
+
   /// The kind of token.
   DirectiveKind Kind = pp_none;
 
-  /// Offset into the output byte stream of where the directive begins.
-  int Offset = -1;
-
-  Directive(DirectiveKind K, int Offset) : Kind(K), Offset(Offset) {}
-};
-
-/// Simplified token range to track the range of a potentially skippable PP
-/// directive.
-struct SkippedRange {
-  /// Offset into the output byte stream of where the skipped directive begins.
-  int Offset;
-
-  /// The number of bytes that can be skipped before the preprocessing must
-  /// resume.
-  int Length;
+  Directive() = default;
+  Directive(DirectiveKind K, ArrayRef<Token> Tokens)
+      : Tokens(Tokens), Kind(K) {}
 };
 
-/// Computes the potential source ranges that can be skipped by the preprocessor
-/// when skipping a directive like #if, #ifdef or #elsif.
-///
-/// \returns false on success, true on error.
-bool computeSkippedRanges(ArrayRef<Directive> Input,
-                          llvm::SmallVectorImpl<SkippedRange> &Range);
-
 } // end namespace dependency_directives_scan
 
-/// Minimize the input down to the preprocessor directives that might have
+/// Scan the input for the preprocessor directives that might have
 /// an effect on the dependencies for a compilation unit.
 ///
-/// This function deletes all non-preprocessor code, and strips anything that
-/// can't affect what gets included. It canonicalizes whitespace where
-/// convenient to stabilize the output against formatting changes in the input.
-///
-/// Clears the output vectors at the beginning of the call.
+/// This function ignores all non-preprocessor code and anything that
+/// can't affect what gets included.
 ///
 /// \returns false on success, true on error. If the diagnostic engine is not
 /// null, an appropriate error is reported using the given input location
-/// with the offset that corresponds to the minimizer's current buffer offset.
+/// with the offset that corresponds to the \p Input buffer offset.
 bool scanSourceForDependencyDirectives(
-    llvm::StringRef Input, llvm::SmallVectorImpl<char> &Output,
-    llvm::SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
+    StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+    SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
     DiagnosticsEngine *Diags = nullptr,
     SourceLocation InputSourceLoc = SourceLocation());
 
+/// Print the previously scanned dependency directives as minimized source text.
+///
+/// \param Source The original source text that the dependency directives were
+/// scanned from.
+/// \param Directives The previously scanned dependency
+/// directives.
+/// \param OS the stream to print the dependency directives on.
+///
+/// This is used primarily for testing purposes, during dependency scanning the
+/// \p Lexer uses the tokens directly, not their printed version.
+void printDependencyDirectivesAsSource(
+    StringRef Source,
+    ArrayRef<dependency_directives_scan::Directive> Directives,
+    llvm::raw_ostream &OS);
+
 } // end namespace clang
 
 #endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -16,6 +16,7 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "clang/Lex/PreprocessorLexer.h"
 #include "clang/Lex/Token.h"
 #include "llvm/ADT/Optional.h"
@@ -149,6 +150,13 @@
   // CurrentConflictMarkerState - The kind of conflict marker we are handling.
   ConflictMarkerKind CurrentConflictMarkerState;
 
+  /// Non-empty if this \p Lexer is \p isDependencyDirectivesLexer().
+  ArrayRef<dependency_directives_scan::Directive> DepDirectives;
+
+  /// If this \p Lexer is \p isDependencyDirectivesLexer(), it represents the
+  /// next token to use from the current dependency directive.
+  unsigned NextDepDirectiveTokenIndex = 0;
+
   void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
 
 public:
@@ -195,6 +203,23 @@
   /// return the tok::eof token.  This implicitly involves the preprocessor.
   bool Lex(Token &Result);
 
+  /// Called when the preprocessor is in 'dependency scanning lexing mode'.
+  bool LexDependencyDirectiveToken(Token &Result);
+
+  /// Called when the preprocessor is in 'dependency scanning lexing mode' and
+  /// is skipping a conditional block.
+  bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);
+
+  /// True when the preprocessor is in 'dependency scanning lexing mode' and
+  /// created this \p Lexer for lexing a set of dependency directive tokens.
+  bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }
+
+  /// Initializes \p Result with data from \p DDTok and advances \p BufferPtr to
+  /// the position just after the token.
+  /// \returns the buffer pointer at the beginning of the token.
+  const char *convertDependencyDirectiveToken(
+      const dependency_directives_scan::Token &DDTok, Token &Result);
+
 public:
   /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
   bool isPragmaLexer() const { return Is_PragmaLexer; }
@@ -288,14 +313,8 @@
     return BufferPtr - BufferStart;
   }
 
-  /// Skip over \p NumBytes bytes.
-  ///
-  /// If the skip is successful, the next token will be lexed from the new
-  /// offset. The lexer also assumes that we skipped to the start of the line.
-  ///
-  /// \returns true if the skip failed (new offset would have been past the
-  /// end of the buffer), false otherwise.
-  bool skipOver(unsigned NumBytes);
+  /// Set the lexer's buffer pointer to \p Offset.
+  void seek(unsigned Offset, bool IsAtStartOfLine);
 
   /// Stringify - Convert the specified string into a C string by i) escaping
   /// '\\' and " characters and ii) replacing newline character(s) with "\\n".
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -29,7 +29,6 @@
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PPCallbacks.h"
-#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -558,6 +557,7 @@
     CLK_Lexer,
     CLK_TokenLexer,
     CLK_CachingLexer,
+    CLK_DependencyDirectivesLexer,
     CLK_LexAfterModuleImport
   } CurLexerKind = CLK_Lexer;
 
@@ -2595,14 +2595,6 @@
   void emitMacroDeprecationWarning(const Token &Identifier) const;
   void emitRestrictExpansionWarning(const Token &Identifier) const;
   void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
-
-  Optional<unsigned>
-  getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
-
-  /// Contains the currently active skipped range mappings for skipping excluded
-  /// conditional directives.
-  ExcludedPreprocessorDirectiveSkipMapping
-      *ExcludedConditionalDirectiveSkipMappings;
 };
 
 /// Abstract base class that describes a handler that will receive
diff --git a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h
deleted file mode 100644
--- a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===- PreprocessorExcludedConditionalDirectiveSkipMapping.h - --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H
-#define LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H
-
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-namespace clang {
-
-/// A mapping from an offset into a buffer to the number of bytes that can be
-/// skipped by the preprocessor when skipping over excluded conditional
-/// directive ranges.
-using PreprocessorSkippedRangeMapping = llvm::DenseMap<unsigned, unsigned>;
-
-/// The datastructure that holds the mapping between the active memory buffers
-/// and the individual skip mappings.
-using ExcludedPreprocessorDirectiveSkipMapping =
-    llvm::DenseMap<const char *, const PreprocessorSkippedRangeMapping *>;
-
-} // end namespace clang
-
-#endif // LLVM_CLANG_LEX_PREPROCESSOREXCLUDEDCONDITIONALDIRECTIVESKIPMAPPING_H
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -10,8 +10,9 @@
 #define LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_
 
 #include "clang/Basic/BitmaskEnum.h"
+#include "clang/Basic/FileEntry.h"
 #include "clang/Basic/LLVM.h"
-#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include <functional>
@@ -200,13 +201,18 @@
   /// build it again.
   std::shared_ptr<FailedModulesSet> FailedModules;
 
-  /// Contains the currently active skipped range mappings for skipping excluded
-  /// conditional directives.
+  /// Function for getting the dependency preprocessor directives of a file.
   ///
-  /// The pointer is passed to the Preprocessor when it's constructed. The
-  /// pointer is unowned, the client is responsible for its lifetime.
-  ExcludedPreprocessorDirectiveSkipMapping
-      *ExcludedConditionalDirectiveSkipMappings = nullptr;
+  /// These are directives derived from a special form of lexing where the
+  /// source input is scanned for the preprocessor directives that might have an
+  /// effect on the dependencies for a compilation unit.
+  ///
+  /// Enables a client to cache the directives for a file and provide them
+  /// across multiple compiler invocations.
+  /// FIXME: Allow returning an error.
+  std::function<Optional<ArrayRef<dependency_directives_scan::Directive>>(
+      FileEntryRef)>
+      DependencyDirectivesForFile;
 
   /// Set up preprocessor for RunAnalysis action.
   bool SetUpStaticAnalyzer = false;
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -10,7 +10,7 @@
 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
 
 #include "clang/Basic/LLVM.h"
-#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Allocator.h"
@@ -22,24 +22,26 @@
 namespace tooling {
 namespace dependencies {
 
-/// Original and minimized contents of a cached file entry. Single instance can
+using DependencyDirectivesTy =
+    SmallVector<dependency_directives_scan::Directive, 20>;
+
+/// Contents and directive tokens of a cached file entry. Single instance can
 /// be shared between multiple entries.
 struct CachedFileContents {
-  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Original)
-      : Original(std::move(Original)), MinimizedAccess(nullptr) {}
+  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
+      : Original(std::move(Contents)), DepDirectives(nullptr) {}
 
   /// Owning storage for the original contents.
   std::unique_ptr<llvm::MemoryBuffer> Original;
 
   /// The mutex that must be locked before mutating directive tokens.
   std::mutex ValueLock;
-  /// Owning storage for the minimized contents.
-  std::unique_ptr<llvm::MemoryBuffer> MinimizedStorage;
+  SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
   /// Accessor to the directive tokens that's atomic to avoid data races.
-  std::atomic<llvm::MemoryBuffer *> MinimizedAccess;
-  /// Skipped range mapping of the minimized contents.
-  /// This is initialized iff `MinimizedAccess != nullptr`.
-  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+  /// \p CachedFileContents has ownership of the pointer.
+  std::atomic<const Optional<DependencyDirectivesTy> *> DepDirectives;
+
+  ~CachedFileContents() { delete DepDirectives.load(); }
 };
 
 /// An in-memory representation of a file system entity that is of interest to
@@ -86,13 +88,17 @@
 
   /// \returns The scanned preprocessor directive tokens of the file that are
   /// used to speed up preprocessing, if available.
-  StringRef getDirectiveTokens() const {
+  Optional<ArrayRef<dependency_directives_scan::Directive>>
+  getDirectiveTokens() const {
     assert(!isError() && "error");
-    assert(!MaybeStat->isDirectory() && "not a file");
+    assert(!isDirectory() && "not a file");
     assert(Contents && "contents not initialized");
-    llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load();
-    assert(Buffer && "not minimized");
-    return Buffer->getBuffer();
+    if (auto *Directives = Contents->DepDirectives.load()) {
+      if (Directives->hasValue())
+        return ArrayRef<dependency_directives_scan::Directive>(
+            Directives->getValue());
+    }
+    return None;
   }
 
   /// \returns The error.
@@ -111,15 +117,6 @@
     return MaybeStat->getUniqueID();
   }
 
-  /// \returns The mapping between location -> distance that is used to speed up
-  /// the block skipping in the preprocessor.
-  const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const {
-    assert(!isError() && "error");
-    assert(!isDirectory() && "not a file");
-    assert(Contents && "contents not initialized");
-    return Contents->PPSkippedRangeMapping;
-  }
-
   /// \returns The data structure holding both contents and directive tokens.
   CachedFileContents *getCachedContents() const {
     assert(!isError() && "error");
@@ -237,10 +234,6 @@
 /// If the underlying entry is an opened file, this wrapper returns the file
 /// contents and the scanned preprocessor directives.
 class EntryRef {
-  /// For entry that is an opened file, this bit signifies whether its contents
-  /// are minimized.
-  bool Minimized;
-
   /// The filename used to access this entry.
   std::string Filename;
 
@@ -248,8 +241,8 @@
   const CachedFileSystemEntry &Entry;
 
 public:
-  EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry)
-      : Minimized(Minimized), Filename(Name), Entry(Entry) {}
+  EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
+      : Filename(Name), Entry(Entry) {}
 
   llvm::vfs::Status getStatus() const {
     llvm::vfs::Status Stat = Entry.getStatus();
@@ -268,12 +261,11 @@
     return *this;
   }
 
-  StringRef getContents() const {
-    return Minimized ? Entry.getDirectiveTokens() : Entry.getOriginalContents();
-  }
+  StringRef getContents() const { return Entry.getOriginalContents(); }
 
-  const PreprocessorSkippedRangeMapping *getPPSkippedRangeMapping() const {
-    return Minimized ? &Entry.getPPSkippedRangeMapping() : nullptr;
+  Optional<ArrayRef<dependency_directives_scan::Directive>>
+  getDirectiveTokens() const {
+    return Entry.getDirectiveTokens();
   }
 };
 
@@ -290,24 +282,13 @@
 public:
   DependencyScanningWorkerFilesystem(
       DependencyScanningFilesystemSharedCache &SharedCache,
-      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
-      ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings)
-      : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache),
-        PPSkipMappings(PPSkipMappings) {}
+      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
+      : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {}
 
   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
   openFileForRead(const Twine &Path) override;
 
-  /// Disable directives scanning of the given file.
-  void disableDirectivesScanning(StringRef Filename);
-  /// Enable directives scanning of all files.
-  void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); }
-
-private:
-  /// Check whether the file should be scanned for preprocessor directives.
-  bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID);
-
   /// Returns entry for the given filename.
   ///
   /// Attempts to use the local and shared caches first, then falls back to
@@ -316,6 +297,10 @@
   getOrCreateFileSystemEntry(StringRef Filename,
                              bool DisableDirectivesScanning = false);
 
+private:
+  /// Check whether the file should be scanned for preprocessor directives.
+  bool shouldScanForDirectives(StringRef Filename);
+
   /// For a filename that's not yet associated with any entry in the caches,
   /// uses the underlying filesystem to either look up the entry based in the
   /// shared cache indexed by unique ID, or creates new entry from scratch.
@@ -396,12 +381,6 @@
   /// The local cache is used by the worker thread to cache file system queries
   /// locally instead of querying the global cache every time.
   DependencyScanningFilesystemLocalCache LocalCache;
-  /// The mapping structure which records information about the
-  /// excluded conditional directive skip mappings that are used by the
-  /// currently active preprocessor.
-  ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings;
-  /// The set of files that should not be scanned for PP directives.
-  llvm::DenseSet<llvm::sys::fs::UniqueID> NotToBeScanned;
 };
 
 } // end namespace dependencies
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -13,7 +13,6 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Frontend/PCHContainerOperations.h"
-#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
 #include "llvm/Support/Error.h"
@@ -69,7 +68,6 @@
 
 private:
   std::shared_ptr<PCHContainerOperations> PCHContainerOps;
-  ExcludedPreprocessorDirectiveSkipMapping PPSkipMappings;
 
   /// The physical filesystem overlaid by `InMemoryFS`.
   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> RealFS;
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -1157,10 +1157,10 @@
   SourceManager &SM = CI.getPreprocessor().getSourceManager();
   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID());
 
-  llvm::SmallString<1024> Output;
+  llvm::SmallVector<dependency_directives_scan::Token, 16> Tokens;
   llvm::SmallVector<dependency_directives_scan::Directive, 32> Directives;
   if (scanSourceForDependencyDirectives(
-          FromFile.getBuffer(), Output, Directives, &CI.getDiagnostics(),
+          FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(),
           SM.getLocForStartOfFile(SM.getMainFileID()))) {
     assert(CI.getDiagnostics().hasErrorOccurred() &&
            "no errors reported for failure");
@@ -1179,7 +1179,8 @@
     }
     return;
   }
-  llvm::outs() << Output;
+  printDependencyDirectivesAsSource(FromFile.getBuffer(), Directives,
+                                    llvm::outs());
 }
 
 void GetDependenciesByModuleNameAction::ExecuteAction() {
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -18,92 +18,148 @@
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/MemoryBuffer.h"
 
-using namespace llvm;
 using namespace clang;
 using namespace clang::dependency_directives_scan;
+using namespace llvm;
 
 namespace {
 
-struct Scanner {
-  /// Minimized output.
-  SmallVectorImpl<char> &Out;
-  /// The known tokens encountered during the minimization.
-  SmallVectorImpl<Directive> &Directives;
+struct DirectiveWithTokens {
+  DirectiveKind Kind;
+  unsigned NumTokens;
 
-  Scanner(SmallVectorImpl<char> &Out, SmallVectorImpl<Directive> &Directives,
-          StringRef Input, DiagnosticsEngine *Diags,
-          SourceLocation InputSourceLoc)
-      : Out(Out), Directives(Directives), Input(Input), Diags(Diags),
-        InputSourceLoc(InputSourceLoc) {}
+  DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens)
+      : Kind(Kind), NumTokens(NumTokens) {}
+};
+
+/// Does an efficient "scan" of the sources to detect the presence of
+/// preprocessor (or module import) directives and collects the raw lexed tokens
+/// for those directives so that the \p Lexer can "replay" them when the file is
+/// included.
+///
+/// Note that the behavior of the raw lexer is affected by the language mode,
+/// while at this point we want to do a scan and collect tokens once,
+/// irrespective of the language mode that the file will get included in. To
+/// compensate for that the \p Lexer, while "replaying", will adjust a token
+/// where appropriate, when it could affect the preprocessor's state.
+/// For example in a directive like
+///
+/// \code
+///   #if __has_cpp_attribute(clang::fallthrough)
+/// \endcode
+///
+/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2
+/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon'
+/// while in C++ mode.
+struct Scanner {
+  Scanner(StringRef Input,
+          SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+          DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
+      : Input(Input), Tokens(Tokens), Diags(Diags),
+        InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
+        TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
+                 Input.end()) {}
+
+  static LangOptions getLangOptsForDepScanning() {
+    LangOptions LangOpts;
+    // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
+    LangOpts.ObjC = true;
+    LangOpts.LineComment = true;
+    return LangOpts;
+  }
 
   /// Lex the provided source and emit the directive tokens.
   ///
   /// \returns True on error.
-  bool scan();
+  bool scan(SmallVectorImpl<Directive> &Directives);
 
 private:
-  struct IdInfo {
-    const char *Last;
-    StringRef Name;
-  };
+  /// Lexes next token and advances \p First and the \p Lexer.
+  LLVM_NODISCARD dependency_directives_scan::Token &
+  lexToken(const char *&First, const char *const End);
+
+  dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
+                                                        const char *const End);
 
-  /// Lex an identifier.
+  /// Lexes next token and if it is identifier returns its string, otherwise
+  /// it skips the current line and returns \p None.
   ///
-  /// \pre First points at a valid identifier head.
-  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
-  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
-                                       const char *const End);
+  /// In any case (whatever the token kind) \p First and the \p Lexer will
+  /// advance beyond the token.
+  LLVM_NODISCARD Optional<StringRef>
+  tryLexIdentifierOrSkipLine(const char *&First, const char *const End);
+
+  /// Used when it is certain that next token is an identifier.
+  LLVM_NODISCARD StringRef lexIdentifier(const char *&First,
+                                         const char *const End);
+
+  /// Lexes next token and returns true iff it is an identifier that matches \p
+  /// Id, otherwise it skips the current line and returns false.
+  ///
+  /// In any case (whatever the token kind) \p First and the \p Lexer will
+  /// advance beyond the token.
+  LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id,
+                                                 const char *&First,
+                                                 const char *const End);
+
   LLVM_NODISCARD bool scanImpl(const char *First, const char *const End);
   LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
   LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
   LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
-  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First,
+                                const char *const End);
   LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
   LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
-  LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, StringRef Directive,
-                                 const char *&First, const char *const End);
-  Directive &pushDirective(DirectiveKind K) {
-    Directives.emplace_back(K, Out.size());
-    return Directives.back();
+  LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First,
+                                 const char *const End);
+  LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind,
+                                             const char *&First,
+                                             const char *const End);
+  void lexPPDirectiveBody(const char *&First, const char *const End);
+
+  DirectiveWithTokens &pushDirective(DirectiveKind Kind) {
+    Tokens.append(CurDirToks);
+    DirsWithToks.emplace_back(Kind, CurDirToks.size());
+    CurDirToks.clear();
+    return DirsWithToks.back();
   }
   void popDirective() {
-    Out.resize(Directives.back().Offset);
-    Directives.pop_back();
+    Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
   }
   DirectiveKind topDirective() const {
-    return Directives.empty() ? pp_none : Directives.back().Kind;
+    return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind;
   }
 
-  Scanner &put(char Byte) {
-    Out.push_back(Byte);
-    return *this;
-  }
-  Scanner &append(StringRef S) { return append(S.begin(), S.end()); }
-  Scanner &append(const char *First, const char *Last) {
-    Out.append(First, Last);
-    return *this;
+  unsigned getOffsetAt(const char *CurPtr) const {
+    return CurPtr - Input.data();
   }
 
-  void printToNewline(const char *&First, const char *const End);
-  void printAdjacentModuleNameParts(const char *&First, const char *const End);
-  LLVM_NODISCARD bool printAtImportBody(const char *&First,
-                                        const char *const End);
-  void printDirectiveBody(const char *&First, const char *const End);
-  void printAdjacentMacroArgs(const char *&First, const char *const End);
-  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
-
   /// Reports a diagnostic if the diagnostic engine is provided. Always returns
   /// true at the end.
   bool reportError(const char *CurPtr, unsigned Err);
 
   StringMap<char> SplitIds;
   StringRef Input;
+  SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
   DiagnosticsEngine *Diags;
   SourceLocation InputSourceLoc;
+
+  /// Keeps track of the tokens for the currently lexed directive. Once a
+  /// directive is fully lexed and "committed" then the tokens get appended to
+  /// \p Tokens and \p CurDirToks is cleared for the next directive.
+  SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
+  /// The directives that were lexed along with the number of tokens that each
+  /// directive contains. The tokens of all the directives are kept in \p Tokens
+  /// vector, in the same order as the directives order in \p DirsWithToks.
+  SmallVector<DirectiveWithTokens, 64> DirsWithToks;
+  LangOptions LangOpts;
+  Lexer TheLexer;
 };
 
 } // end anonymous namespace
@@ -112,7 +168,7 @@
   if (!Diags)
     return true;
   assert(CurPtr >= Input.data() && "invalid buffer ptr");
-  Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
+  Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err);
   return true;
 }
 
@@ -265,30 +321,6 @@
   }
 }
 
-static const char *findLastNonSpace(const char *First, const char *Last) {
-  assert(First <= Last);
-  while (First != Last && isHorizontalWhitespace(Last[-1]))
-    --Last;
-  return Last;
-}
-
-static const char *findLastNonSpaceNonBackslash(const char *First,
-                                                const char *Last) {
-  assert(First <= Last);
-  while (First != Last &&
-         (isHorizontalWhitespace(Last[-1]) || Last[-1] == '\\'))
-    --Last;
-  return Last;
-}
-
-static const char *findFirstTrailingSpace(const char *First, const char *Last) {
-  const char *LastNonSpace = findLastNonSpace(First, Last);
-  if (Last == LastNonSpace)
-    return Last;
-  assert(isHorizontalWhitespace(LastNonSpace[0]));
-  return LastNonSpace + 1;
-}
-
 static void skipLineComment(const char *&First, const char *const End) {
   assert(First[0] == '/' && First[1] == '/');
   First += 2;
@@ -396,67 +428,6 @@
     skipLine(First, End);
 }
 
-void Scanner::printToNewline(const char *&First, const char *const End) {
-  while (First != End && !isVerticalWhitespace(*First)) {
-    const char *Last = First;
-    do {
-      // Iterate over strings correctly to avoid comments and newlines.
-      if (*Last == '"' || *Last == '\'' ||
-          (*Last == '<' &&
-           (topDirective() == pp_include || topDirective() == pp_import))) {
-        if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
-          skipRawString(Last, End);
-        else
-          skipString(Last, End);
-        continue;
-      }
-      if (*Last != '/' || End - Last < 2) {
-        ++Last;
-        continue; // Gather the rest up to print verbatim.
-      }
-
-      if (Last[1] != '/' && Last[1] != '*') {
-        ++Last;
-        continue;
-      }
-
-      // Deal with "//..." and "/*...*/".
-      append(First, findFirstTrailingSpace(First, Last));
-      First = Last;
-
-      if (Last[1] == '/') {
-        skipLineComment(First, End);
-        return;
-      }
-
-      put(' ');
-      skipBlockComment(First, End);
-      skipOverSpaces(First, End);
-      Last = First;
-    } while (Last != End && !isVerticalWhitespace(*Last));
-
-    // Print out the string.
-    const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
-    if (Last == End || LastBeforeTrailingSpace == First ||
-        LastBeforeTrailingSpace[-1] != '\\') {
-      append(First, LastBeforeTrailingSpace);
-      First = Last;
-      skipNewline(First, End);
-      return;
-    }
-
-    // Print up to the last character that's not a whitespace or backslash.
-    // Then print exactly one space, which matters when tokens are separated by
-    // a line continuation.
-    append(First, findLastNonSpaceNonBackslash(First, Last));
-    put(' ');
-
-    First = Last;
-    skipNewline(First, End);
-    skipOverSpaces(First, End);
-  }
-}
-
 static void skipWhitespace(const char *&First, const char *const End) {
   for (;;) {
     assert(First <= End);
@@ -489,176 +460,134 @@
   }
 }
 
-void Scanner::printAdjacentModuleNameParts(const char *&First,
-                                           const char *const End) {
-  // Skip over parts of the body.
-  const char *Last = First;
-  do
-    ++Last;
-  while (Last != End && (isAsciiIdentifierContinue(*Last) || *Last == '.'));
-  append(First, Last);
-  First = Last;
-}
-
-bool Scanner::printAtImportBody(const char *&First, const char *const End) {
+bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
+                                     const char *const End) {
+  const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
   for (;;) {
-    skipWhitespace(First, End);
-    if (First == End)
-      return true;
-
-    if (isVerticalWhitespace(*First)) {
-      skipNewline(First, End);
-      continue;
-    }
-
-    // Found a semicolon.
-    if (*First == ';') {
-      put(*First++).put('\n');
-      return false;
-    }
-
-    // Don't handle macro expansions inside @import for now.
-    if (!isAsciiIdentifierContinue(*First) && *First != '.')
-      return true;
-
-    printAdjacentModuleNameParts(First, End);
+    const dependency_directives_scan::Token &Tok = lexToken(First, End);
+    if (Tok.is(tok::eof))
+      return reportError(
+          DirectiveLoc,
+          diag::err_dep_source_scanner_missing_semi_after_at_import);
+    if (Tok.is(tok::semi))
+      break;
   }
+  pushDirective(Kind);
+  skipWhitespace(First, End);
+  if (First == End)
+    return false;
+  if (!isVerticalWhitespace(*First))
+    return reportError(
+        DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
+  skipNewline(First, End);
+  return false;
 }
 
-void Scanner::printDirectiveBody(const char *&First, const char *const End) {
-  skipWhitespace(First, End); // Skip initial whitespace.
-  printToNewline(First, End);
-  while (Out.back() == ' ')
-    Out.pop_back();
-  put('\n');
-}
+dependency_directives_scan::Token &Scanner::lexToken(const char *&First,
+                                                     const char *const End) {
+  clang::Token Tok;
+  TheLexer.LexFromRawLexer(Tok);
+  First = Input.data() + TheLexer.getCurrentBufferOffset();
+  assert(First <= End);
 
-LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
-                                                   const char *const End) {
-  assert(isAsciiIdentifierContinue(*First) && "invalid identifer");
-  const char *Last = First + 1;
-  while (Last != End && isAsciiIdentifierContinue(*Last))
-    ++Last;
-  return Last;
+  unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
+  CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
+                          Tok.getFlags());
+  return CurDirToks.back();
 }
 
-LLVM_NODISCARD static const char *
-getIdentifierContinuation(const char *First, const char *const End) {
-  if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
-    return nullptr;
+dependency_directives_scan::Token &
+Scanner::lexIncludeFilename(const char *&First, const char *const End) {
+  clang::Token Tok;
+  TheLexer.LexIncludeFilename(Tok);
+  First = Input.data() + TheLexer.getCurrentBufferOffset();
+  assert(First <= End);
 
-  ++First;
-  skipNewline(First, End);
-  if (First == End)
-    return nullptr;
-  return isAsciiIdentifierContinue(First[0]) ? First : nullptr;
-}
-
-Scanner::IdInfo Scanner::lexIdentifier(const char *First,
-                                       const char *const End) {
-  const char *Last = lexRawIdentifier(First, End);
-  const char *Next = getIdentifierContinuation(Last, End);
-  if (LLVM_LIKELY(!Next))
-    return IdInfo{Last, StringRef(First, Last - First)};
-
-  // Slow path, where identifiers are split over lines.
-  SmallVector<char, 64> Id(First, Last);
-  while (Next) {
-    Last = lexRawIdentifier(Next, End);
-    Id.append(Next, Last);
-    Next = getIdentifierContinuation(Last, End);
-  }
-  return IdInfo{
-      Last,
-      SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
+  unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
+  CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
+                          Tok.getFlags());
+  return CurDirToks.back();
 }
 
-void Scanner::printAdjacentMacroArgs(const char *&First,
-                                     const char *const End) {
-  // Skip over parts of the body.
-  const char *Last = First;
-  do
-    ++Last;
-  while (Last != End &&
-         (isAsciiIdentifierContinue(*Last) || *Last == '.' || *Last == ','));
-  append(First, Last);
-  First = Last;
+void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
+  while (true) {
+    const dependency_directives_scan::Token &Tok = lexToken(First, End);
+    if (Tok.is(tok::eod))
+      break;
+  }
 }
 
-bool Scanner::printMacroArgs(const char *&First, const char *const End) {
-  assert(*First == '(');
-  put(*First++);
-  for (;;) {
-    skipWhitespace(First, End);
-    if (First == End)
-      return true;
+LLVM_NODISCARD Optional<StringRef>
+Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
+  const dependency_directives_scan::Token &Tok = lexToken(First, End);
+  if (Tok.isNot(tok::raw_identifier)) {
+    if (!Tok.is(tok::eod))
+      skipLine(First, End);
+    return None;
+  }
 
-    if (*First == ')') {
-      put(*First++);
-      return false;
-    }
+  bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
+  if (LLVM_LIKELY(!NeedsCleaning))
+    return Input.slice(Tok.Offset, Tok.getEnd());
 
-    // This is intentionally fairly liberal.
-    if (!(isAsciiIdentifierContinue(*First) || *First == '.' || *First == ','))
-      return true;
+  SmallString<64> Spelling;
+  Spelling.resize(Tok.Length);
 
-    printAdjacentMacroArgs(First, End);
+  unsigned SpellingLength = 0;
+  const char *BufPtr = Input.begin() + Tok.Offset;
+  const char *AfterIdent = Input.begin() + Tok.getEnd();
+  while (BufPtr < AfterIdent) {
+    unsigned Size;
+    Spelling[SpellingLength++] =
+        Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+    BufPtr += Size;
   }
+
+  return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
+      .first->first();
 }
 
-/// Looks for an identifier starting from Last.
-///
-/// Updates "First" to just past the next identifier, if any.  Returns true iff
-/// the identifier matches "Id".
-bool Scanner::isNextIdentifier(StringRef Id, const char *&First,
-                               const char *const End) {
-  skipWhitespace(First, End);
-  if (First == End || !isAsciiIdentifierStart(*First))
-    return false;
+StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {
+  Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);
+  assert(Id.hasValue() && "expected identifier token");
+  return Id.getValue();
+}
 
-  IdInfo FoundId = lexIdentifier(First, End);
-  First = FoundId.Last;
-  return FoundId.Name == Id;
+bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
+                                         const char *const End) {
+  if (Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End)) {
+    if (*FoundId == Id)
+      return true;
+    skipLine(First, End);
+  }
+  return false;
 }
 
 bool Scanner::lexAt(const char *&First, const char *const End) {
   // Handle "@import".
-  const char *ImportLoc = First++;
-  if (!isNextIdentifier("import", First, End)) {
-    skipLine(First, End);
-    return false;
-  }
-  pushDirective(decl_at_import);
-  append("@import ");
-  if (printAtImportBody(First, End))
-    return reportError(
-        ImportLoc, diag::err_dep_source_scanner_missing_semi_after_at_import);
-  skipWhitespace(First, End);
-  if (First == End)
+
+  // Lex '@'.
+  const dependency_directives_scan::Token &AtTok = lexToken(First, End);
+  assert(AtTok.is(tok::at));
+  (void)AtTok;
+
+  if (!isNextIdentifierOrSkipLine("import", First, End))
     return false;
-  if (!isVerticalWhitespace(*First))
-    return reportError(
-        ImportLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
-  skipNewline(First, End);
-  return false;
+  return lexModuleDirectiveBody(decl_at_import, First, End);
 }
 
 bool Scanner::lexModule(const char *&First, const char *const End) {
-  IdInfo Id = lexIdentifier(First, End);
-  First = Id.Last;
+  StringRef Id = lexIdentifier(First, End);
   bool Export = false;
-  if (Id.Name == "export") {
+  if (Id == "export") {
     Export = true;
-    skipWhitespace(First, End);
-    if (!isAsciiIdentifierContinue(*First)) {
-      skipLine(First, End);
+    Optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);
+    if (!NextId)
       return false;
-    }
-    Id = lexIdentifier(First, End);
-    First = Id.Last;
+    Id = *NextId;
   }
 
-  if (Id.Name != "module" && Id.Name != "import") {
+  if (Id != "module" && Id != "import") {
     skipLine(First, End);
     return false;
   }
@@ -680,114 +609,51 @@
     }
   }
 
-  if (Export) {
-    pushDirective(cxx_export_decl);
-    append("export ");
-  }
+  TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false);
 
-  if (Id.Name == "module")
-    pushDirective(cxx_module_decl);
+  DirectiveKind Kind;
+  if (Id == "module")
+    Kind = Export ? cxx_export_module_decl : cxx_module_decl;
   else
-    pushDirective(cxx_import_decl);
-  append(Id.Name);
-  append(" ");
-  printToNewline(First, End);
-  append("\n");
-  return false;
-}
-
-bool Scanner::lexDefine(const char *&First, const char *const End) {
-  pushDirective(pp_define);
-  append("#define ");
-  skipWhitespace(First, End);
+    Kind = Export ? cxx_export_import_decl : cxx_import_decl;
 
-  if (!isAsciiIdentifierStart(*First))
-    return reportError(First, diag::err_pp_macro_not_identifier);
-
-  IdInfo Id = lexIdentifier(First, End);
-  const char *Last = Id.Last;
-  append(Id.Name);
-  if (Last == End)
-    return false;
-  if (*Last == '(') {
-    size_t Size = Out.size();
-    if (printMacroArgs(Last, End)) {
-      // Be robust to bad macro arguments, since they can show up in disabled
-      // code.
-      Out.resize(Size);
-      append("(/* invalid */\n");
-      skipLine(Last, End);
-      return false;
-    }
-  }
-  skipWhitespace(Last, End);
-  if (Last == End)
-    return false;
-  if (!isVerticalWhitespace(*Last))
-    put(' ');
-  printDirectiveBody(Last, End);
-  First = Last;
-  return false;
+  return lexModuleDirectiveBody(Kind, First, End);
 }
 
 bool Scanner::lexPragma(const char *&First, const char *const End) {
-  // #pragma.
-  skipWhitespace(First, End);
-  if (First == End || !isAsciiIdentifierStart(*First))
+  Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
+  if (!FoundId)
     return false;
 
-  IdInfo FoundId = lexIdentifier(First, End);
-  First = FoundId.Last;
-  if (FoundId.Name == "once") {
-    // #pragma once
-    skipLine(First, End);
-    pushDirective(pp_pragma_once);
-    append("#pragma once\n");
-    return false;
-  }
-  if (FoundId.Name == "push_macro") {
-    // #pragma push_macro
-    pushDirective(pp_pragma_push_macro);
-    append("#pragma push_macro");
-    printDirectiveBody(First, End);
-    return false;
-  }
-  if (FoundId.Name == "pop_macro") {
-    // #pragma pop_macro
-    pushDirective(pp_pragma_pop_macro);
-    append("#pragma pop_macro");
-    printDirectiveBody(First, End);
-    return false;
-  }
-  if (FoundId.Name == "include_alias") {
-    // #pragma include_alias
-    pushDirective(pp_pragma_include_alias);
-    append("#pragma include_alias");
-    printDirectiveBody(First, End);
+  StringRef Id = FoundId.getValue();
+  auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
+                  .Case("once", pp_pragma_once)
+                  .Case("push_macro", pp_pragma_push_macro)
+                  .Case("pop_macro", pp_pragma_pop_macro)
+                  .Case("include_alias", pp_pragma_include_alias)
+                  .Default(pp_none);
+  if (Kind != pp_none) {
+    lexPPDirectiveBody(First, End);
+    pushDirective(Kind);
     return false;
   }
 
-  if (FoundId.Name != "clang") {
+  if (Id != "clang") {
     skipLine(First, End);
     return false;
   }
 
   // #pragma clang.
-  if (!isNextIdentifier("module", First, End)) {
-    skipLine(First, End);
+  if (!isNextIdentifierOrSkipLine("module", First, End))
     return false;
-  }
 
   // #pragma clang module.
-  if (!isNextIdentifier("import", First, End)) {
-    skipLine(First, End);
+  if (!isNextIdentifierOrSkipLine("import", First, End))
     return false;
-  }
 
   // #pragma clang module import.
+  lexPPDirectiveBody(First, End);
   pushDirective(pp_pragma_import);
-  append("#pragma clang module import ");
-  printDirectiveBody(First, End);
   return false;
 }
 
@@ -808,14 +674,13 @@
     return false;
   }
 
-  return lexDefault(pp_endif, "endif", First, End);
+  return lexDefault(pp_endif, First, End);
 }
 
-bool Scanner::lexDefault(DirectiveKind Kind, StringRef Directive,
-                         const char *&First, const char *const End) {
+bool Scanner::lexDefault(DirectiveKind Kind, const char *&First,
+                         const char *const End) {
+  lexPPDirectiveBody(First, End);
   pushDirective(Kind);
-  put('#').append(Directive).put(' ');
-  printDirectiveBody(First, End);
   return false;
 }
 
@@ -845,6 +710,14 @@
     return false;
   }
 
+  TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);
+
+  auto ScEx1 = make_scope_exit([&]() {
+    /// Clear Scanner's CurDirToks before returning, in case we didn't push a
+    /// new directive.
+    CurDirToks.clear();
+  });
+
   // Handle "@import".
   if (*First == '@')
     return lexAt(First, End);
@@ -853,25 +726,26 @@
     return lexModule(First, End);
 
   // Handle preprocessing directives.
-  ++First; // Skip over '#'.
-  skipWhitespace(First, End);
 
-  if (First == End)
-    return reportError(First, diag::err_pp_expected_eol);
+  TheLexer.setParsingPreprocessorDirective(true);
+  auto ScEx2 = make_scope_exit(
+      [&]() { TheLexer.setParsingPreprocessorDirective(false); });
 
-  if (!isAsciiIdentifierStart(*First)) {
-    skipLine(First, End);
+  // Lex '#'.
+  const dependency_directives_scan::Token &HashTok = lexToken(First, End);
+  assert(HashTok.is(tok::hash));
+  (void)HashTok;
+
+  Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
+  if (!FoundId)
     return false;
-  }
 
-  // Figure out the token.
-  IdInfo Id = lexIdentifier(First, End);
-  First = Id.Last;
+  StringRef Id = FoundId.getValue();
 
-  if (Id.Name == "pragma")
+  if (Id == "pragma")
     return lexPragma(First, End);
 
-  auto Kind = llvm::StringSwitch<DirectiveKind>(Id.Name)
+  auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
                   .Case("include", pp_include)
                   .Case("__include_macros", pp___include_macros)
                   .Case("define", pp_define)
@@ -888,18 +762,26 @@
                   .Case("endif", pp_endif)
                   .Default(pp_none);
   if (Kind == pp_none) {
-    skipDirective(Id.Name, First, End);
+    skipDirective(Id, First, End);
     return false;
   }
 
   if (Kind == pp_endif)
     return lexEndif(First, End);
 
-  if (Kind == pp_define)
-    return lexDefine(First, End);
+  switch (Kind) {
+  case pp_include:
+  case pp___include_macros:
+  case pp_include_next:
+  case pp_import:
+    lexIncludeFilename(First, End);
+    break;
+  default:
+    break;
+  }
 
   // Everything else.
-  return lexDefault(Kind, Id.Name, First, End);
+  return lexDefault(Kind, First, End);
 }
 
 static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
@@ -916,78 +798,65 @@
   return false;
 }
 
-bool Scanner::scan() {
+bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
   bool Error = scanImpl(Input.begin(), Input.end());
 
   if (!Error) {
-    // Add a trailing newline and an EOF on success.
-    if (!Out.empty() && Out.back() != '\n')
-      Out.push_back('\n');
+    // Add an EOF on success.
     pushDirective(pp_eof);
   }
 
-  // Null-terminate the output. This way the memory buffer that's passed to
-  // Clang will not have to worry about the terminating '\0'.
-  Out.push_back(0);
-  Out.pop_back();
+  ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
+  for (const DirectiveWithTokens &DirWithToks : DirsWithToks) {
+    assert(RemainingTokens.size() >= DirWithToks.NumTokens);
+    Directives.emplace_back(DirWithToks.Kind,
+                            RemainingTokens.take_front(DirWithToks.NumTokens));
+    RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
+  }
+  assert(RemainingTokens.empty());
+
   return Error;
 }
 
-bool clang::dependency_directives_scan::computeSkippedRanges(
-    ArrayRef<Directive> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
-  struct IfElseDirective {
-    enum DirectiveKind {
-      If,  // if/ifdef/ifndef
-      Else // elif/elifdef/elifndef, else
-    };
-    int Offset;
-    DirectiveKind Kind;
+bool clang::scanSourceForDependencyDirectives(
+    StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+    SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
+    SourceLocation InputSourceLoc) {
+  return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
+}
+
+void clang::printDependencyDirectivesAsSource(
+    StringRef Source,
+    ArrayRef<dependency_directives_scan::Directive> Directives,
+    llvm::raw_ostream &OS) {
+  // Add a space separator where it is convenient for testing purposes.
+  auto needsSpaceSeparator =
+      [](tok::TokenKind Prev,
+         const dependency_directives_scan::Token &Tok) -> bool {
+    if (Prev == Tok.Kind)
+      return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
+                          tok::r_square);
+    if (Prev == tok::raw_identifier &&
+        Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
+                    tok::char_constant, tok::header_name))
+      return true;
+    if (Prev == tok::r_paren &&
+        Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
+                    tok::char_constant, tok::unknown))
+      return true;
+    if (Prev == tok::comma &&
+        Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
+      return true;
+    return false;
   };
-  llvm::SmallVector<IfElseDirective, 32> Offsets;
-  for (const Directive &T : Input) {
-    switch (T.Kind) {
-    case pp_if:
-    case pp_ifdef:
-    case pp_ifndef:
-      Offsets.push_back({T.Offset, IfElseDirective::If});
-      break;
-
-    case pp_elif:
-    case pp_elifdef:
-    case pp_elifndef:
-    case pp_else: {
-      if (Offsets.empty())
-        return true;
-      int PreviousOffset = Offsets.back().Offset;
-      Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
-      Offsets.push_back({T.Offset, IfElseDirective::Else});
-      break;
-    }
 
-    case pp_endif: {
-      if (Offsets.empty())
-        return true;
-      int PreviousOffset = Offsets.back().Offset;
-      Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
-      do {
-        IfElseDirective::DirectiveKind Kind = Offsets.pop_back_val().Kind;
-        if (Kind == IfElseDirective::If)
-          break;
-      } while (!Offsets.empty());
-      break;
-    }
-    default:
-      break;
+  for (const dependency_directives_scan::Directive &Directive : Directives) {
+    Optional<tok::TokenKind> PrevTokenKind;
+    for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {
+      if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))
+        OS << ' ';
+      PrevTokenKind = Tok.Kind;
+      OS << Source.slice(Tok.Offset, Tok.getEnd());
     }
   }
-  return false;
-}
-
-bool clang::scanSourceForDependencyDirectives(
-    StringRef Input, SmallVectorImpl<char> &Output,
-    SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
-    SourceLocation InputSourceLoc) {
-  Output.clear();
-  Directives.clear();
-  return Scanner(Output, Directives, Input, Diags, InputSourceLoc).scan();
 }
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -226,13 +226,11 @@
   return L;
 }
 
-bool Lexer::skipOver(unsigned NumBytes) {
-  IsAtPhysicalStartOfLine = true;
-  IsAtStartOfLine = true;
-  if ((BufferPtr + NumBytes) > BufferEnd)
-    return true;
-  BufferPtr += NumBytes;
-  return false;
+void Lexer::seek(unsigned Offset, bool IsAtStartOfLine) {
+  this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
+  this->IsAtStartOfLine = IsAtStartOfLine;
+  assert((BufferStart + Offset) <= BufferEnd);
+  BufferPtr = BufferStart + Offset;
 }
 
 template <typename T> static void StringifyImpl(T &Str, char Quote) {
@@ -2939,6 +2937,13 @@
 unsigned Lexer::isNextPPTokenLParen() {
   assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
 
+  if (isDependencyDirectivesLexer()) {
+    if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
+      return 2;
+    return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
+        tok::l_paren);
+  }
+
   // Switch to 'skipping' mode.  This will ensure that we can lex a token
   // without emitting diagnostics, disables macro expansion, and will cause EOF
   // to return an EOF token instead of popping the include stack.
@@ -3281,6 +3286,8 @@
 }
 
 bool Lexer::Lex(Token &Result) {
+  assert(!isDependencyDirectivesLexer());
+
   // Start a new token.
   Result.startToken();
 
@@ -4102,3 +4109,129 @@
   // We parsed the directive; lex a token with the new state.
   return false;
 }
+
+const char *Lexer::convertDependencyDirectiveToken(
+    const dependency_directives_scan::Token &DDTok, Token &Result) {
+  const char *TokPtr = BufferStart + DDTok.Offset;
+  Result.startToken();
+  Result.setLocation(getSourceLocation(TokPtr));
+  Result.setKind(DDTok.Kind);
+  Result.setFlag((Token::TokenFlags)DDTok.Flags);
+  Result.setLength(DDTok.Length);
+  BufferPtr = TokPtr + DDTok.Length;
+  return TokPtr;
+}
+
+bool Lexer::LexDependencyDirectiveToken(Token &Result) {
+  assert(isDependencyDirectivesLexer());
+
+  using namespace dependency_directives_scan;
+
+  while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
+    if (DepDirectives.front().Kind == pp_eof)
+      return LexEndOfFile(Result, BufferEnd);
+    NextDepDirectiveTokenIndex = 0;
+    DepDirectives = DepDirectives.drop_front();
+  }
+
+  const dependency_directives_scan::Token &DDTok =
+      DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
+
+  const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result);
+
+  if (Result.is(tok::hash) && Result.isAtStartOfLine()) {
+    PP->HandleDirective(Result);
+    return false;
+  }
+  if (Result.is(tok::raw_identifier)) {
+    Result.setRawIdentifierData(TokPtr);
+    if (!isLexingRawMode()) {
+      IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+      if (II->isHandleIdentifierCase())
+        return PP->HandleIdentifier(Result);
+    }
+    return true;
+  }
+  if (Result.isLiteral()) {
+    Result.setLiteralData(TokPtr);
+    return true;
+  }
+  if (Result.is(tok::colon) &&
+      (LangOpts.CPlusPlus || LangOpts.DoubleSquareBracketAttributes)) {
+    // Convert consecutive colons to 'tok::coloncolon'.
+    if (*BufferPtr == ':') {
+      assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
+          tok::colon));
+      ++NextDepDirectiveTokenIndex;
+      Result.setKind(tok::coloncolon);
+    }
+    return true;
+  }
+  if (Result.is(tok::eod))
+    ParsingPreprocessorDirective = false;
+
+  return true;
+}
+
+bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {
+  assert(isDependencyDirectivesLexer());
+
+  using namespace dependency_directives_scan;
+
+  bool Stop = false;
+  unsigned NestedIfs = 0;
+  do {
+    DepDirectives = DepDirectives.drop_front();
+    switch (DepDirectives.front().Kind) {
+    case pp_none:
+      llvm_unreachable("unexpected 'pp_none'");
+    case pp_include:
+    case pp___include_macros:
+    case pp_define:
+    case pp_undef:
+    case pp_import:
+    case pp_pragma_import:
+    case pp_pragma_once:
+    case pp_pragma_push_macro:
+    case pp_pragma_pop_macro:
+    case pp_pragma_include_alias:
+    case pp_include_next:
+    case decl_at_import:
+    case cxx_module_decl:
+    case cxx_import_decl:
+    case cxx_export_module_decl:
+    case cxx_export_import_decl:
+      break;
+    case pp_if:
+    case pp_ifdef:
+    case pp_ifndef:
+      ++NestedIfs;
+      break;
+    case pp_elif:
+    case pp_elifdef:
+    case pp_elifndef:
+    case pp_else:
+      if (!NestedIfs) {
+        Stop = true;
+      }
+      break;
+    case pp_endif:
+      if (!NestedIfs) {
+        Stop = true;
+      } else {
+        --NestedIfs;
+      }
+      break;
+    case pp_eof:
+      return LexEndOfFile(Result, BufferEnd);
+    }
+  } while (!Stop);
+
+  const dependency_directives_scan::Token &DDTok =
+      DepDirectives.front().Tokens.front();
+  assert(DDTok.is(tok::hash));
+  NextDepDirectiveTokenIndex = 1;
+
+  convertDependencyDirectiveToken(DDTok, Result);
+  return false;
+}
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -443,41 +443,6 @@
   return DiscardUntilEndOfDirective().getEnd();
 }
 
-Optional<unsigned> Preprocessor::getSkippedRangeForExcludedConditionalBlock(
-    SourceLocation HashLoc) {
-  if (!ExcludedConditionalDirectiveSkipMappings)
-    return None;
-  if (!HashLoc.isFileID())
-    return None;
-
-  std::pair<FileID, unsigned> HashFileOffset =
-      SourceMgr.getDecomposedLoc(HashLoc);
-  Optional<llvm::MemoryBufferRef> Buf =
-      SourceMgr.getBufferOrNone(HashFileOffset.first);
-  if (!Buf)
-    return None;
-  auto It =
-      ExcludedConditionalDirectiveSkipMappings->find(Buf->getBufferStart());
-  if (It == ExcludedConditionalDirectiveSkipMappings->end())
-    return None;
-
-  const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond();
-  // Check if the offset of '#' is mapped in the skipped ranges.
-  auto MappingIt = SkippedRanges.find(HashFileOffset.second);
-  if (MappingIt == SkippedRanges.end())
-    return None;
-
-  unsigned BytesToSkip = MappingIt->getSecond();
-  unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset();
-  assert(CurLexerBufferOffset >= HashFileOffset.second &&
-         "lexer is before the hash?");
-  // Take into account the fact that the lexer has already advanced, so the
-  // number of bytes to skip must be adjusted.
-  unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second;
-  assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?");
-  return BytesToSkip - LengthDiff;
-}
-
 void Preprocessor::SuggestTypoedDirective(const Token &Tok,
                                           StringRef Directive,
                                           const SourceLocation &EndLoc) const {
@@ -527,36 +492,42 @@
   // disabling warnings, etc.
   CurPPLexer->LexingRawMode = true;
   Token Tok;
-  if (auto SkipLength =
-          getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) {
-    // Skip to the next '#endif' / '#else' / '#elif'.
-    CurLexer->skipOver(*SkipLength);
-  }
   SourceLocation endLoc;
   while (true) {
-    CurLexer->Lex(Tok);
+    if (CurLexer->isDependencyDirectivesLexer()) {
+      CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
+    } else {
+      while (true) {
+        CurLexer->Lex(Tok);
 
-    if (Tok.is(tok::code_completion)) {
-      setCodeCompletionReached();
-      if (CodeComplete)
-        CodeComplete->CodeCompleteInConditionalExclusion();
-      continue;
-    }
+        if (Tok.is(tok::code_completion)) {
+          setCodeCompletionReached();
+          if (CodeComplete)
+            CodeComplete->CodeCompleteInConditionalExclusion();
+          continue;
+        }
 
-    // If this is the end of the buffer, we have an error.
-    if (Tok.is(tok::eof)) {
-      // We don't emit errors for unterminated conditionals here,
-      // Lexer::LexEndOfFile can do that properly.
-      // Just return and let the caller lex after this #include.
-      if (PreambleConditionalStack.isRecording())
-        PreambleConditionalStack.SkipInfo.emplace(
-            HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc);
-      break;
-    }
+        // If this is the end of the buffer, we have an error.
+        if (Tok.is(tok::eof)) {
+          // We don't emit errors for unterminated conditionals here,
+          // Lexer::LexEndOfFile can do that properly.
+          // Just return and let the caller lex after this #include.
+          if (PreambleConditionalStack.isRecording())
+            PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
+                                                      FoundNonSkipPortion,
+                                                      FoundElse, ElseLoc);
+          break;
+        }
 
-    // If this token is not a preprocessor directive, just skip it.
-    if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
-      continue;
+        // If this token is not a preprocessor directive, just skip it.
+        if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
+          continue;
+
+        break;
+      }
+    }
+    if (Tok.is(tok::eof))
+      break;
 
     // We just parsed a # character at the start of a line, so we're in
     // directive mode.  Tell the lexer this so any newlines we see will be
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -91,8 +91,19 @@
         CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
   }
 
-  EnterSourceFileWithLexer(
-      new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile), CurDir);
+  Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile);
+  if (getPreprocessorOpts().DependencyDirectivesForFile &&
+      FID != PredefinesFileID) {
+    if (Optional<FileEntryRef> File = SourceMgr.getFileEntryRefForID(FID)) {
+      if (Optional<ArrayRef<dependency_directives_scan::Directive>>
+              DepDirectives =
+                  getPreprocessorOpts().DependencyDirectivesForFile(*File)) {
+        TheLexer->DepDirectives = *DepDirectives;
+      }
+    }
+  }
+
+  EnterSourceFileWithLexer(TheLexer, CurDir);
   return false;
 }
 
@@ -110,7 +121,9 @@
   CurDirLookup = CurDir;
   CurLexerSubmodule = nullptr;
   if (CurLexerKind != CLK_LexAfterModuleImport)
-    CurLexerKind = CLK_Lexer;
+    CurLexerKind = TheLexer->isDependencyDirectivesLexer()
+                       ? CLK_DependencyDirectivesLexer
+                       : CLK_Lexer;
 
   // Notify the client, if desired, that we are in a new source file.
   if (Callbacks && !CurLexer->Is_PragmaLexer) {
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -158,11 +158,6 @@
   if (this->PPOpts->GeneratePreamble)
     PreambleConditionalStack.startRecording();
 
-  ExcludedConditionalDirectiveSkipMappings =
-      this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
-  if (ExcludedConditionalDirectiveSkipMappings)
-    ExcludedConditionalDirectiveSkipMappings->clear();
-
   MaxTokens = LangOpts.MaxTokens;
 }
 
@@ -382,7 +377,9 @@
 
 void Preprocessor::recomputeCurLexerKind() {
   if (CurLexer)
-    CurLexerKind = CLK_Lexer;
+    CurLexerKind = CurLexer->isDependencyDirectivesLexer()
+                       ? CLK_DependencyDirectivesLexer
+                       : CLK_Lexer;
   else if (CurTokenLexer)
     CurLexerKind = CLK_TokenLexer;
   else
@@ -645,6 +642,9 @@
     case CLK_CachingLexer:
       CachingLex(Tok);
       break;
+    case CLK_DependencyDirectivesLexer:
+      CurLexer->LexDependencyDirectiveToken(Tok);
+      break;
     case CLK_LexAfterModuleImport:
       LexAfterModuleImport(Tok);
       break;
@@ -906,6 +906,9 @@
       CachingLex(Result);
       ReturnedToken = true;
       break;
+    case CLK_DependencyDirectivesLexer:
+      ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result);
+      break;
     case CLK_LexAfterModuleImport:
       ReturnedToken = LexAfterModuleImport(Result);
       break;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
-#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SmallVectorMemoryBuffer.h"
 #include "llvm/Support/Threading.h"
@@ -44,64 +43,41 @@
 EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
     const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
   if (Entry.isError() || Entry.isDirectory() || Disable ||
-      !shouldScanForDirectives(Filename, Entry.getUniqueID()))
-    return EntryRef(/*Minimized=*/false, Filename, Entry);
+      !shouldScanForDirectives(Filename))
+    return EntryRef(Filename, Entry);
 
   CachedFileContents *Contents = Entry.getCachedContents();
   assert(Contents && "contents not initialized");
 
   // Double-checked locking.
-  if (Contents->MinimizedAccess.load())
-    return EntryRef(/*Minimized=*/true, Filename, Entry);
+  if (Contents->DepDirectives.load())
+    return EntryRef(Filename, Entry);
 
   std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
 
   // Double-checked locking.
-  if (Contents->MinimizedAccess.load())
-    return EntryRef(/*Minimized=*/true, Filename, Entry);
+  if (Contents->DepDirectives.load())
+    return EntryRef(Filename, Entry);
 
-  llvm::SmallString<1024> MinimizedFileContents;
-  // Minimize the file down to directives that might affect the dependencies.
-  SmallVector<dependency_directives_scan::Directive, 64> Tokens;
+  SmallVector<dependency_directives_scan::Directive, 64> Directives;
+  // Scan the file for preprocessor directives that might affect the
+  // dependencies.
   if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
-                                        MinimizedFileContents, Tokens)) {
+                                        Contents->DepDirectiveTokens,
+                                        Directives)) {
+    Contents->DepDirectiveTokens.clear();
     // FIXME: Propagate the diagnostic if desired by the client.
-    // Use the original file if the minimization failed.
-    Contents->MinimizedStorage =
-        llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
-    Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
-    return EntryRef(/*Minimized=*/true, Filename, Entry);
+    Contents->DepDirectives.store(new Optional<DependencyDirectivesTy>());
+    return EntryRef(Filename, Entry);
   }
 
-  // The contents produced by the minimizer must be null terminated.
-  assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
-         "not null terminated contents");
-
-  // Compute the skipped PP ranges that speedup skipping over inactive
-  // preprocessor blocks.
-  llvm::SmallVector<dependency_directives_scan::SkippedRange, 32> SkippedRanges;
-  dependency_directives_scan::computeSkippedRanges(Tokens, SkippedRanges);
-  PreprocessorSkippedRangeMapping Mapping;
-  for (const auto &Range : SkippedRanges) {
-    if (Range.Length < 16) {
-      // Ignore small ranges as non-profitable.
-      // FIXME: This is a heuristic, its worth investigating the tradeoffs
-      // when it should be applied.
-      continue;
-    }
-    Mapping[Range.Offset] = Range.Length;
-  }
-  Contents->PPSkippedRangeMapping = std::move(Mapping);
-
-  Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
-      std::move(MinimizedFileContents));
-  // This function performed double-checked locking using `MinimizedAccess`.
-  // Assigning it must be the last thing this function does. If we were to
-  // assign it before `PPSkippedRangeMapping`, other threads may skip the
-  // critical section (`MinimizedAccess != nullptr`) and access the mappings
-  // that are about to be initialized, leading to a data race.
-  Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
-  return EntryRef(/*Minimized=*/true, Filename, Entry);
+  // This function performed double-checked locking using `DepDirectives`.
+  // Assigning it must be the last thing this function does, otherwise other
+  // threads may skip the
+  // critical section (`DepDirectives != nullptr`), leading to a data race.
+  Contents->DepDirectives.store(
+      new Optional<DependencyDirectivesTy>(std::move(Directives)));
+  return EntryRef(Filename, Entry);
 }
 
 DependencyScanningFilesystemSharedCache::
@@ -208,19 +184,9 @@
   return shouldScanForDirectivesBasedOnExtension(Filename);
 }
 
-void DependencyScanningWorkerFilesystem::disableDirectivesScanning(
-    StringRef Filename) {
-  // Since we're not done setting up `NotToBeScanned` yet, we need to disable
-  // directive scanning explicitly.
-  if (llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(
-          Filename, /*DisableDirectivesScanning=*/true))
-    NotToBeScanned.insert(Result->getStatus().getUniqueID());
-}
-
 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
-    StringRef Filename, llvm::sys::fs::UniqueID UID) {
-  return shouldScanForDirectivesBasedOnExtension(Filename) &&
-         !NotToBeScanned.contains(UID);
+    StringRef Filename) {
+  return shouldScanForDirectivesBasedOnExtension(Filename);
 }
 
 const CachedFileSystemEntry &
@@ -307,9 +273,7 @@
               llvm::vfs::Status Stat)
       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
 
-  static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
-  create(EntryRef Entry,
-         ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings);
+  static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
 
   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
 
@@ -329,8 +293,7 @@
 } // end anonymous namespace
 
 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
-DepScanFile::create(EntryRef Entry,
-                    ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings) {
+DepScanFile::create(EntryRef Entry) {
   assert(!Entry.isError() && "error");
 
   if (Entry.isDirectory())
@@ -342,10 +305,6 @@
                                        /*RequiresNullTerminator=*/false),
       Entry.getStatus());
 
-  const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping();
-  if (EntrySkipMappings && !EntrySkipMappings->empty())
-    PPSkipMappings[Result->Buffer->getBufferStart()] = EntrySkipMappings;
-
   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
 }
@@ -358,5 +317,5 @@
   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
   if (!Result)
     return Result.getError();
-  return DepScanFile::create(Result.get(), PPSkipMappings);
+  return DepScanFile::create(Result.get());
 }
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -137,12 +137,11 @@
   DependencyScanningAction(
       StringRef WorkingDirectory, DependencyConsumer &Consumer,
       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
-      ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings,
       ScanningOutputFormat Format, bool OptimizeArgs,
       llvm::Optional<StringRef> ModuleName = None)
       : WorkingDirectory(WorkingDirectory), Consumer(Consumer),
-        DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), Format(Format),
-        OptimizeArgs(OptimizeArgs), ModuleName(ModuleName) {}
+        DepFS(std::move(DepFS)), Format(Format), OptimizeArgs(OptimizeArgs),
+        ModuleName(ModuleName) {}
 
   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
                      FileManager *FileMgr,
@@ -183,29 +182,21 @@
 
     // Use the dependency scanning optimized file system if requested to do so.
     if (DepFS) {
-      DepFS->enableDirectivesScanningOfAllFiles();
-      // Don't minimize any files that contributed to prebuilt modules. The
-      // implicit build validates the modules by comparing the reported sizes of
-      // their inputs to the current state of the filesystem. Minimization would
-      // throw this mechanism off.
-      for (const auto &File : PrebuiltModulesInputFiles)
-        DepFS->disableDirectivesScanning(File.getKey());
-      // Don't minimize any files that were explicitly passed in the build
-      // settings and that might be opened.
-      for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries)
-        DepFS->disableDirectivesScanning(E.Path);
-      for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles)
-        DepFS->disableDirectivesScanning(F);
-
       // Support for virtual file system overlays on top of the caching
       // filesystem.
       FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
           ScanInstance.getInvocation(), ScanInstance.getDiagnostics(), DepFS));
 
-      // Pass the skip mappings which should speed up excluded conditional block
-      // skipping in the preprocessor.
-      ScanInstance.getPreprocessorOpts()
-          .ExcludedConditionalDirectiveSkipMappings = &PPSkipMappings;
+      llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> LocalDepFS =
+          DepFS;
+      ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
+          [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File)
+          -> Optional<ArrayRef<dependency_directives_scan::Directive>> {
+        if (llvm::ErrorOr<EntryRef> Entry =
+                LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
+          return Entry->getDirectiveTokens();
+        return None;
+      };
     }
 
     // Create the dependency collector that will collect the produced
@@ -262,7 +253,6 @@
   StringRef WorkingDirectory;
   DependencyConsumer &Consumer;
   llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
-  ExcludedPreprocessorDirectiveSkipMapping &PPSkipMappings;
   ScanningOutputFormat Format;
   bool OptimizeArgs;
   llvm::Optional<StringRef> ModuleName;
@@ -289,7 +279,7 @@
 
   if (Service.getMode() == ScanningMode::DependencyDirectivesScan)
     DepFS = new DependencyScanningWorkerFilesystem(Service.getSharedCache(),
-                                                   RealFS, PPSkipMappings);
+                                                   RealFS);
   if (Service.canReuseFileManager())
     Files = new FileManager(FileSystemOptions(), RealFS);
 }
@@ -340,8 +330,8 @@
   return runWithDiags(CreateAndPopulateDiagOpts(FinalCCommandLine).release(),
                       [&](DiagnosticConsumer &DC, DiagnosticOptions &DiagOpts) {
                         DependencyScanningAction Action(
-                            WorkingDirectory, Consumer, DepFS, PPSkipMappings,
-                            Format, OptimizeArgs, ModuleName);
+                            WorkingDirectory, Consumer, DepFS, Format,
+                            OptimizeArgs, ModuleName);
                         // Create an invocation that uses the underlying file
                         // system to ensure that any file system requests that
                         // are made by the driver do not go through the
diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
--- a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
@@ -1,3 +1,4 @@
-// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s
 
-#define 0 0 // expected-error {{macro name must be an identifier}}
+#define 0 0
+// CHECK: #define 0 0
diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
--- a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
@@ -15,7 +15,7 @@
 #pragma include_alias(<string>,   "mystring.h")
 
 // CHECK:      #pragma once
-// CHECK-NEXT: #pragma push_macro( "MYMACRO" )
+// CHECK-NEXT: #pragma push_macro("MYMACRO")
 // CHECK-NEXT: #pragma pop_macro("MYMACRO")
 // CHECK-NEXT: #pragma clang module import mymodule
 // CHECK-NEXT: #pragma include_alias(<string>, "mystring.h")
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -14,39 +14,58 @@
 using namespace clang;
 using namespace clang::dependency_directives_scan;
 
-static bool minimizeSourceToDependencyDirectives(StringRef Input,
-                                                 SmallVectorImpl<char> &Out) {
-  SmallVector<dependency_directives_scan::Directive, 32> Directives;
-  return scanSourceForDependencyDirectives(Input, Out, Directives);
+static bool minimizeSourceToDependencyDirectives(
+    StringRef Input, SmallVectorImpl<char> &Out,
+    SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+    SmallVectorImpl<Directive> &Directives) {
+  Out.clear();
+  Tokens.clear();
+  Directives.clear();
+  if (scanSourceForDependencyDirectives(Input, Tokens, Directives))
+    return true;
+
+  raw_svector_ostream OS(Out);
+  printDependencyDirectivesAsSource(Input, Directives, OS);
+  if (!Out.empty() && Out.back() != '\n')
+    Out.push_back('\n');
+  Out.push_back('\0');
+  Out.pop_back();
+
+  return false;
 }
 
-static bool
-minimizeSourceToDependencyDirectives(StringRef Input,
-                                     SmallVectorImpl<char> &Out,
-                                     SmallVectorImpl<Directive> &Directives) {
-  return scanSourceForDependencyDirectives(Input, Out, Directives);
+static bool minimizeSourceToDependencyDirectives(StringRef Input,
+                                                 SmallVectorImpl<char> &Out) {
+  SmallVector<dependency_directives_scan::Token, 16> Tokens;
+  SmallVector<Directive, 32> Directives;
+  return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives);
 }
 
 namespace {
 
 TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Directives));
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("", Out, Tokens, Directives));
   EXPECT_TRUE(Out.empty());
+  EXPECT_TRUE(Tokens.empty());
   ASSERT_EQ(1u, Directives.size());
   ASSERT_EQ(pp_eof, Directives.back().Kind);
 
-  ASSERT_FALSE(
-      minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Directives));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens,
+                                                    Directives));
   EXPECT_TRUE(Out.empty());
+  EXPECT_TRUE(Tokens.empty());
   ASSERT_EQ(1u, Directives.size());
   ASSERT_EQ(pp_eof, Directives.back().Kind);
 }
 
-TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) {
+TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
   ASSERT_FALSE(
@@ -71,7 +90,7 @@
                                            "#pragma include_alias(<A>, <B>)\n"
                                            "export module m;\n"
                                            "import m;\n",
-                                           Out, Directives));
+                                           Out, Tokens, Directives));
   EXPECT_EQ(pp_define, Directives[0].Kind);
   EXPECT_EQ(pp_undef, Directives[1].Kind);
   EXPECT_EQ(pp_endif, Directives[2].Kind);
@@ -91,19 +110,28 @@
   EXPECT_EQ(pp_pragma_push_macro, Directives[16].Kind);
   EXPECT_EQ(pp_pragma_pop_macro, Directives[17].Kind);
   EXPECT_EQ(pp_pragma_include_alias, Directives[18].Kind);
-  EXPECT_EQ(cxx_export_decl, Directives[19].Kind);
-  EXPECT_EQ(cxx_module_decl, Directives[20].Kind);
-  EXPECT_EQ(cxx_import_decl, Directives[21].Kind);
-  EXPECT_EQ(pp_eof, Directives[22].Kind);
+  EXPECT_EQ(cxx_export_module_decl, Directives[19].Kind);
+  EXPECT_EQ(cxx_import_decl, Directives[20].Kind);
+  EXPECT_EQ(pp_eof, Directives[21].Kind);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, EmptyHash) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#\n#define MACRO a\n", Out));
+  EXPECT_STREQ("#define MACRO a\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
-  ASSERT_FALSE(
-      minimizeSourceToDependencyDirectives("#define MACRO", Out, Directives));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out,
+                                                    Tokens, Directives));
   EXPECT_STREQ("#define MACRO\n", Out.data());
+  ASSERT_EQ(4u, Tokens.size());
   ASSERT_EQ(2u, Directives.size());
   ASSERT_EQ(pp_define, Directives.front().Kind);
 }
@@ -144,25 +172,25 @@
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO   con  tent   ", Out));
-  EXPECT_STREQ("#define MACRO con  tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO con tent\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO()   con  tent   ", Out));
-  EXPECT_STREQ("#define MACRO() con  tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) {
   SmallVector<char, 128> Out;
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out));
-  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+  EXPECT_STREQ("#define MACRO((a))\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out));
-  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+  EXPECT_STREQ("#define MACRO(\n", Out.data());
 
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out));
-  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+  EXPECT_STREQ("#define MACRO(a*b)\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) {
@@ -170,19 +198,19 @@
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO(\t)\tcon \t tent\t", Out));
-  EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO(\f)\fcon \f tent\f", Out));
-  EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO(\v)\vcon \v tent\v", Out));
-  EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data());
+  EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out));
-  EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data());
+  EXPECT_STREQ("#define MACRO con tent\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) {
@@ -255,25 +283,27 @@
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) {
   SmallVector<char, 128> Out;
 
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) {
   SmallVector<char, 128> Out;
 
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define &\n", Out));
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) {
   SmallVector<char, 128> Out;
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out));
-  EXPECT_STREQ("#define AND &\n", Out.data());
+  EXPECT_STREQ("#define AND&\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n"
                                                     "&\n",
                                                     Out));
-  EXPECT_STREQ("#define AND &\n", Out.data());
+  EXPECT_STREQ("#define AND\\\n"
+               "&\n",
+               Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) {
@@ -303,6 +333,14 @@
                Out.data());
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, CommentSlashSlashStar) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO 1 //* blah */\n", Out));
+  EXPECT_STREQ("#define MACRO 1\n", Out.data());
+}
+
 TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) {
   SmallVector<char, 128> Out;
 
@@ -481,6 +519,9 @@
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("#__include_macros <A>\n", Out));
   EXPECT_STREQ("#__include_macros <A>\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include MACRO\n", Out));
+  EXPECT_STREQ("#include MACRO\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) {
@@ -507,8 +548,9 @@
   SmallVector<char, 128> Out;
 
   ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
-  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
@@ -559,7 +601,8 @@
                                                     "#define GUARD\n"
                                                     "#endif\n",
                                                     Out));
-  EXPECT_STREQ("#ifndef GUARD\n"
+  EXPECT_STREQ("#if\\\n"
+               "ndef GUARD\n"
                "#define GUARD\n"
                "#endif\n",
                Out.data());
@@ -567,12 +610,16 @@
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
                                                     "RD\n",
                                                     Out));
-  EXPECT_STREQ("#define GUARD\n", Out.data());
+  EXPECT_STREQ("#define GUA\\\n"
+               "RD\n",
+               Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r"
                                                     "RD\n",
                                                     Out));
-  EXPECT_STREQ("#define GUARD\n", Out.data());
+  EXPECT_STREQ("#define GUA\\\r"
+               "RD\n",
+               Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
                                                     "           RD\n",
@@ -588,7 +635,10 @@
                                                     "2 + \\\t\n"
                                                     "3\n",
                                                     Out));
-  EXPECT_STREQ("#define A 1 + 2 + 3\n", Out.data());
+  EXPECT_STREQ("#define A 1+\\  \n"
+               "2+\\\t\n"
+               "3\n",
+               Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) {
@@ -682,6 +732,7 @@
 
 TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
   StringRef Source = R"(// comment
@@ -689,7 +740,8 @@
 // another comment
 #include <test.h>
 )";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
   EXPECT_STREQ("#pragma once\n#include <test.h>\n", Out.data());
   ASSERT_EQ(Directives.size(), 3u);
   EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_pragma_once);
@@ -700,7 +752,7 @@
     #include <test.h>
     )";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-  EXPECT_STREQ("#pragma once\n#include <test.h>\n", Out.data());
+  EXPECT_STREQ("#pragma once extra tokens\n#include <test.h>\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest,
@@ -755,11 +807,12 @@
 
   Source = "#define X \"\\ \r\nx\n#include <x>\n";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-  EXPECT_STREQ("#define X \"\\ \r\nx\n#include <x>\n", Out.data());
+  EXPECT_STREQ("#define X\"\\ \r\nx\n#include <x>\n", Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) {
   SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
 
   StringRef Source = R"(
@@ -789,81 +842,17 @@
       import f(->a = 3);
     }
     )";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
-  EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;\n"
-               "export import :l [[rename]];\n"
-               "import <<= 3;\nimport a b d e d e f e;\n"
-               "import foo [[no_unique_address]];\nimport foo();\n"
-               "import f(:sefse);\nimport f(->a = 3);\n",
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
+  EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;"
+               "exp\\\nort import:l[[rename]];"
+               "import<<=3;import a b d e d e f e;"
+               "import foo[[no_unique_address]];import foo();"
+               "import f(:sefse);import f(->a=3);\n",
                Out.data());
-  ASSERT_EQ(Directives.size(), 12u);
-  EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_include);
-  EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::cxx_module_decl);
-}
-
-TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasic) {
-  SmallString<128> Out;
-  SmallVector<Directive, 32> Directives;
-  StringRef Source = "#ifndef GUARD\n"
-                     "#define GUARD\n"
-                     "void foo();\n"
-                     "#endif\n";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
-  SmallVector<SkippedRange, 4> Ranges;
-  ASSERT_FALSE(computeSkippedRanges(Directives, Ranges));
-  EXPECT_EQ(Ranges.size(), 1u);
-  EXPECT_EQ(Ranges[0].Offset, 0);
-  EXPECT_EQ(Ranges[0].Length, (int)Out.find("#endif"));
-}
-
-TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesBasicElifdef) {
-  SmallString<128> Out;
-  SmallVector<Directive, 32> Directives;
-  StringRef Source = "#ifdef BLAH\n"
-                     "void skip();\n"
-                     "#elifdef BLAM\n"
-                     "void skip();\n"
-                     "#elifndef GUARD\n"
-                     "#define GUARD\n"
-                     "void foo();\n"
-                     "#endif\n";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
-  SmallVector<SkippedRange, 4> Ranges;
-  ASSERT_FALSE(computeSkippedRanges(Directives, Ranges));
-  EXPECT_EQ(Ranges.size(), 3u);
-  EXPECT_EQ(Ranges[0].Offset, 0);
-  EXPECT_EQ(Ranges[0].Length, (int)Out.find("#elifdef"));
-  EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elifdef"));
-  EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#elifndef"));
-  EXPECT_EQ(Ranges[2].Offset, (int)Out.find("#elifndef"));
-  EXPECT_EQ(Ranges[2].Offset + Ranges[2].Length, (int)Out.rfind("#endif"));
-}
-
-TEST(MinimizeSourceToDependencyDirectivesTest, SkippedPPRangesNested) {
-  SmallString<128> Out;
-  SmallVector<Directive, 32> Directives;
-  StringRef Source = "#ifndef GUARD\n"
-                     "#define GUARD\n"
-                     "#if FOO\n"
-                     "#include hello\n"
-                     "#elif BAR\n"
-                     "#include bye\n"
-                     "#endif\n"
-                     "#else\n"
-                     "#include nothing\n"
-                     "#endif\n";
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
-  SmallVector<SkippedRange, 4> Ranges;
-  ASSERT_FALSE(computeSkippedRanges(Directives, Ranges));
-  EXPECT_EQ(Ranges.size(), 4u);
-  EXPECT_EQ(Ranges[0].Offset, (int)Out.find("#if FOO"));
-  EXPECT_EQ(Ranges[0].Offset + Ranges[0].Length, (int)Out.find("#elif"));
-  EXPECT_EQ(Ranges[1].Offset, (int)Out.find("#elif BAR"));
-  EXPECT_EQ(Ranges[1].Offset + Ranges[1].Length, (int)Out.find("#endif"));
-  EXPECT_EQ(Ranges[2].Offset, 0);
-  EXPECT_EQ(Ranges[2].Length, (int)Out.find("#else"));
-  EXPECT_EQ(Ranges[3].Offset, (int)Out.find("#else"));
-  EXPECT_EQ(Ranges[3].Offset + Ranges[3].Length, (int)Out.rfind("#endif"));
+  ASSERT_EQ(Directives.size(), 10u);
+  EXPECT_EQ(Directives[0].Kind, pp_include);
+  EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl);
 }
 
 } // end anonymous namespace
diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp
--- a/clang/unittests/Tooling/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScannerTest.cpp
@@ -204,53 +204,5 @@
   EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h");
 }
 
-namespace dependencies {
-TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) {
-  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-  VFS->addFile("/mod.h", 0,
-               llvm::MemoryBuffer::getMemBuffer("#include <foo.h>\n"
-                                                "// hi there!\n"));
-
-  DependencyScanningFilesystemSharedCache SharedCache;
-  ExcludedPreprocessorDirectiveSkipMapping Mappings;
-  DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings);
-
-  DepFS.enableDirectivesScanningOfAllFiles(); // Let's be explicit for clarity.
-  auto StatusMinimized0 = DepFS.status("/mod.h");
-  DepFS.disableDirectivesScanning("/mod.h");
-  auto StatusFull1 = DepFS.status("/mod.h");
-
-  EXPECT_TRUE(StatusMinimized0);
-  EXPECT_TRUE(StatusFull1);
-  EXPECT_EQ(StatusMinimized0->getSize(), 17u);
-  EXPECT_EQ(StatusFull1->getSize(), 30u);
-  EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h"));
-  EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h"));
-}
-
-TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) {
-  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-  VFS->addFile("/mod.h", 0,
-               llvm::MemoryBuffer::getMemBuffer("#include <foo.h>\n"
-                                                "// hi there!\n"));
-
-  DependencyScanningFilesystemSharedCache SharedCache;
-  ExcludedPreprocessorDirectiveSkipMapping Mappings;
-  DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS, Mappings);
-
-  DepFS.disableDirectivesScanning("/mod.h");
-  auto StatusFull0 = DepFS.status("/mod.h");
-  DepFS.enableDirectivesScanningOfAllFiles();
-  auto StatusMinimized1 = DepFS.status("/mod.h");
-
-  EXPECT_TRUE(StatusFull0);
-  EXPECT_TRUE(StatusMinimized1);
-  EXPECT_EQ(StatusFull0->getSize(), 30u);
-  EXPECT_EQ(StatusMinimized1->getSize(), 17u);
-  EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h"));
-  EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h"));
-}
-
-} // end namespace dependencies
 } // end namespace tooling
 } // end namespace clang