Index: cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td =================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td +++ cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td @@ -818,4 +818,13 @@ } +let CategoryName = "Dependency Directive Source Minimization Issue" in { + +def err_dep_source_minimizer_missing_sema_after_at_import : Error< + "could not find ';' after @import">; +def err_dep_source_minimizer_unexpected_tokens_at_import : Error< + "unexpected extra tokens at end of @import declaration">; + +} + } Index: cfe/trunk/include/clang/Driver/CC1Options.td =================================================================== --- cfe/trunk/include/clang/Driver/CC1Options.td +++ cfe/trunk/include/clang/Driver/CC1Options.td @@ -612,6 +612,9 @@ HelpText<"Migrate source code">; def compiler_options_dump : Flag<["-"], "compiler-options-dump">, HelpText<"Dump the compiler configuration options">; +def print_dependency_directives_minimized_source : Flag<["-"], + "print-dependency-directives-minimized-source">, + HelpText<"Print the output of the dependency directives source minimizer">; } def emit_llvm_uselists : Flag<["-"], "emit-llvm-uselists">, Index: cfe/trunk/include/clang/Frontend/FrontendActions.h =================================================================== --- cfe/trunk/include/clang/Frontend/FrontendActions.h +++ cfe/trunk/include/clang/Frontend/FrontendActions.h @@ -240,6 +240,17 @@ bool usesPreprocessorOnly() const override { return true; } }; +class PrintDependencyDirectivesSourceMinimizerAction : public FrontendAction { +protected: + void ExecuteAction() override; + std::unique_ptr CreateASTConsumer(CompilerInstance &, + StringRef) override { + return nullptr; + } + + bool usesPreprocessorOnly() const override { return true; } +}; + //===----------------------------------------------------------------------===// // Preprocessor Actions //===----------------------------------------------------------------------===// Index: cfe/trunk/include/clang/Frontend/FrontendOptions.h =================================================================== --- cfe/trunk/include/clang/Frontend/FrontendOptions.h +++ cfe/trunk/include/clang/Frontend/FrontendOptions.h @@ -128,7 +128,10 @@ MigrateSource, /// Just lex, no output. - RunPreprocessorOnly + RunPreprocessorOnly, + + /// Print the output of the dependency directives source minimizer. + PrintDependencyDirectivesSourceMinimizerOutput }; } // namespace frontend Index: cfe/trunk/include/clang/Lex/DependencyDirectivesSourceMinimizer.h =================================================================== --- cfe/trunk/include/clang/Lex/DependencyDirectivesSourceMinimizer.h +++ cfe/trunk/include/clang/Lex/DependencyDirectivesSourceMinimizer.h @@ -0,0 +1,88 @@ +//===- clang/Lex/DependencyDirectivesSourceMinimizer.h - ----------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the interface for minimizing header and source files to the +/// minimum necessary preprocessor directives for evaluating includes. It +/// reduces the source down to #define, #include, #import, @import, and any +/// conditional preprocessor logic that contains one of those. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H +#define LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { + +class DiagnosticsEngine; + +namespace minimize_source_to_dependency_directives { + +/// Represents the kind of preprocessor directive or a module declaration that +/// is tracked by the source minimizer in its token output. +enum TokenKind { + pp_none, + pp_include, + pp___include_macros, + pp_define, + pp_undef, + pp_import, + pp_pragma_import, + pp_include_next, + pp_if, + pp_ifdef, + pp_ifndef, + pp_elif, + pp_else, + pp_endif, + decl_at_import, + pp_eof, +}; + +/// Represents a simplified token that's lexed as part of the source +/// minimization. It's used to track the location of various preprocessor +/// directives that could potentially have an effect on the depedencies. +struct Token { + /// The kind of token. + TokenKind K = pp_none; + + /// Offset into the output byte stream of where the directive begins. + int Offset = -1; + + Token(TokenKind K, int Offset) : K(K), Offset(Offset) {} +}; + +} // end namespace minimize_source_to_dependency_directives + +/// Minimize the input down to the preprocessor directives that might have +/// an effect on the dependencies for a compilation unit. +/// +/// This function deletes all non-preprocessor code, and strips anything that +/// can't affect what gets included. It canonicalizes whitespace where +/// convenient to stabilize the output against formatting changes in the input. +/// +/// Clears the output vectors at the beginning of the call. +/// +/// \returns false on success, true on error. If the diagnostic engine is not +/// null, an appropriate error is reported using the given input location +/// with the offset that corresponds to the minimizer's current buffer offset. +bool minimizeSourceToDependencyDirectives( + llvm::StringRef Input, llvm::SmallVectorImpl &Output, + llvm::SmallVectorImpl + &Tokens, + DiagnosticsEngine *Diags = nullptr, + SourceLocation InputSourceLoc = SourceLocation()); + +} // end namespace clang + +#endif // LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp =================================================================== --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp @@ -1696,6 +1696,10 @@ Opts.ProgramAction = frontend::MigrateSource; break; case OPT_Eonly: Opts.ProgramAction = frontend::RunPreprocessorOnly; break; + case OPT_print_dependency_directives_minimized_source: + Opts.ProgramAction = + frontend::PrintDependencyDirectivesSourceMinimizerOutput; + break; } } @@ -3116,6 +3120,7 @@ case frontend::PrintPreprocessedInput: case frontend::RewriteMacros: case frontend::RunPreprocessorOnly: + case frontend::PrintDependencyDirectivesSourceMinimizerOutput: return true; } llvm_unreachable("invalid frontend action"); Index: cfe/trunk/lib/Frontend/FrontendActions.cpp =================================================================== --- cfe/trunk/lib/Frontend/FrontendActions.cpp +++ cfe/trunk/lib/Frontend/FrontendActions.cpp @@ -14,6 +14,7 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/MultiplexConsumer.h" #include "clang/Frontend/Utils.h" +#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" @@ -23,8 +24,8 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -908,3 +909,33 @@ OS << "}"; } + +void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() { + CompilerInstance &CI = getCompilerInstance(); + SourceManager &SM = CI.getPreprocessor().getSourceManager(); + const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID()); + + llvm::SmallString<1024> Output; + llvm::SmallVector Toks; + if (minimizeSourceToDependencyDirectives( + FromFile->getBuffer(), Output, Toks, &CI.getDiagnostics(), + SM.getLocForStartOfFile(SM.getMainFileID()))) { + assert(CI.getDiagnostics().hasErrorOccurred() && + "no errors reported for failure"); + + // Preprocess the source when verifying the diagnostics to capture the + // 'expected' comments. + if (CI.getDiagnosticOpts().VerifyDiagnostics) { + // Make sure we don't emit new diagnostics! + CI.getDiagnostics().setSuppressAllDiagnostics(); + Preprocessor &PP = getCompilerInstance().getPreprocessor(); + PP.EnterMainSourceFile(); + Token Tok; + do { + PP.Lex(Tok); + } while (Tok.isNot(tok::eof)); + } + return; + } + llvm::outs() << Output; +} Index: cfe/trunk/lib/FrontendTool/ExecuteCompilerInvocation.cpp =================================================================== --- cfe/trunk/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ cfe/trunk/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -116,6 +116,8 @@ case RunAnalysis: Action = "RunAnalysis"; break; #endif case RunPreprocessorOnly: return llvm::make_unique(); + case PrintDependencyDirectivesSourceMinimizerOutput: + return llvm::make_unique(); } #if !CLANG_ENABLE_ARCMT || !CLANG_ENABLE_STATIC_ANALYZER \ Index: cfe/trunk/lib/Lex/CMakeLists.txt =================================================================== --- cfe/trunk/lib/Lex/CMakeLists.txt +++ cfe/trunk/lib/Lex/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangLex + DependencyDirectivesSourceMinimizer.cpp HeaderMap.cpp HeaderSearch.cpp Lexer.cpp Index: cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp =================================================================== --- cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -0,0 +1,756 @@ +//===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the implementation for minimizing header and source files to the +/// minimum necessary preprocessor directives for evaluating includes. It +/// reduces the source down to #define, #include, #import, @import, and any +/// conditional preprocessor logic that contains one of those. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Lex/LexDiagnostic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace clang; +using namespace clang::minimize_source_to_dependency_directives; + +namespace { + +struct Minimizer { + /// Minimized output. + SmallVectorImpl &Out; + /// The known tokens encountered during the minimization. + SmallVectorImpl &Tokens; + + Minimizer(SmallVectorImpl &Out, SmallVectorImpl &Tokens, + StringRef Input, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) + : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags), + InputSourceLoc(InputSourceLoc) {} + + /// Lex the provided source and emit the minimized output. + /// + /// \returns True on error. + bool minimize(); + +private: + struct IdInfo { + const char *Last; + StringRef Name; + }; + + /// Lex an identifier. + /// + /// \pre First points at a valid identifier head. + LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); + LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, + const char *const End); + LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); + LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); + LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, + const char *&First, const char *const End); + Token &makeToken(TokenKind K) { + Tokens.emplace_back(K, Out.size()); + return Tokens.back(); + } + void popToken() { + Out.resize(Tokens.back().Offset); + Tokens.pop_back(); + } + TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } + + Minimizer &put(char Byte) { + Out.push_back(Byte); + return *this; + } + Minimizer &append(StringRef S) { return append(S.begin(), S.end()); } + Minimizer &append(const char *First, const char *Last) { + Out.append(First, Last); + return *this; + } + + void printToNewline(const char *&First, const char *const End); + void printAdjacentModuleNameParts(const char *&First, const char *const End); + LLVM_NODISCARD bool printAtImportBody(const char *&First, + const char *const End); + void printDirectiveBody(const char *&First, const char *const End); + void printAdjacentMacroArgs(const char *&First, const char *const End); + LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); + + /// Reports a diagnostic if the diagnostic engine is provided. Always returns + /// true at the end. + bool reportError(const char *CurPtr, unsigned Err); + + StringMap SplitIds; + StringRef Input; + DiagnosticsEngine *Diags; + SourceLocation InputSourceLoc; +}; + +} // end anonymous namespace + +bool Minimizer::reportError(const char *CurPtr, unsigned Err) { + if (!Diags) + return true; + assert(CurPtr >= Input.data() && "invalid buffer ptr"); + Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); + return true; +} + +static void skipOverSpaces(const char *&First, const char *const End) { + while (First != End && isHorizontalWhitespace(*First)) + ++First; +} + +LLVM_NODISCARD static bool isRawStringLiteral(const char *First, + const char *Current) { + assert(First <= Current); + + // Check if we can even back up. + if (*Current != '\"' || First == Current) + return false; + + // Check for an "R". + --Current; + if (*Current != 'R') + return false; + if (First == Current || !isIdentifierBody(*--Current)) + return true; + + // Check for a prefix of "u", "U", or "L". + if (*Current == 'u' || *Current == 'U' || *Current == 'L') + return First == Current || !isIdentifierBody(*--Current); + + // Check for a prefix of "u8". + if (*Current != '8' || First == Current || *Current-- != 'u') + return false; + return First == Current || !isIdentifierBody(*--Current); +} + +static void skipRawString(const char *&First, const char *const End) { + assert(First[0] == '\"'); + assert(First[-1] == 'R'); + + const char *Last = ++First; + while (Last != End && *Last != '(') + ++Last; + if (Last == End) { + First = Last; // Hit the end... just give up. + return; + } + + StringRef Terminator(First, Last - First); + for (;;) { + // Move First to just past the next ")". + First = Last; + while (First != End && *First != ')') + ++First; + if (First == End) + return; + ++First; + + // Look ahead for the terminator sequence. + Last = First; + while (Last != End && size_t(Last - First) < Terminator.size() && + Terminator[Last - First] == *Last) + ++Last; + + // Check if we hit it (or the end of the file). + if (Last == End) { + First = Last; + return; + } + if (size_t(Last - First) < Terminator.size()) + continue; + if (*Last != '\"') + continue; + First = Last + 1; + return; + } +} + +static void skipString(const char *&First, const char *const End) { + assert(*First == '\'' || *First == '\"'); + const char Terminator = *First; + for (++First; First != End && *First != Terminator; ++First) + if (*First == '\\') + if (++First == End) + return; + if (First != End) + ++First; // Finish off the string. +} + +static void skipNewline(const char *&First, const char *End) { + assert(isVerticalWhitespace(*First)); + ++First; + if (First == End) + return; + + // Check for "\n\r" and "\r\n". + if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0])) + ++First; +} + +static void skipToNewlineRaw(const char *&First, const char *const End) { + for (;;) { + if (First == End) + return; + + if (isVerticalWhitespace(*First)) + return; + + while (!isVerticalWhitespace(*First)) + if (++First == End) + return; + + if (First[-1] != '\\') + return; + + ++First; // Keep going... + } +} + +static const char *reverseOverSpaces(const char *First, const char *Last) { + assert(First <= Last); + while (First != Last && isHorizontalWhitespace(Last[-1])) + --Last; + return Last; +} + +static void skipLineComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '/'); + First += 2; + skipToNewlineRaw(First, End); +} + +static void skipBlockComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '*'); + if (End - First < 4) { + First = End; + return; + } + for (First += 3; First != End; ++First) + if (First[-1] == '*' && First[0] == '/') { + ++First; + return; + } +} + +/// \returns True if the current single quotation mark character is a C++ 14 +/// digit separator. +static bool isQuoteCppDigitSeparator(const char *const Start, + const char *const Cur, + const char *const End) { + assert(*Cur == '\'' && "expected quotation character"); + // skipLine called in places where we don't expect a valid number + // body before `start` on the same line, so always return false at the start. + if (Start == Cur) + return false; + // The previous character must be a valid PP number character. + if (!isPreprocessingNumberBody(*(Cur - 1))) + return false; + // The next character should be a valid identifier body character. + return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); +} + +static void skipLine(const char *&First, const char *const End) { + do { + assert(First <= End); + if (First == End) + return; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + return; + } + const char *Start = First; + while (First != End && !isVerticalWhitespace(*First)) { + // Iterate over strings correctly to avoid comments and newlines. + if (*First == '\"' || + (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { + if (isRawStringLiteral(Start, First)) + skipRawString(First, End); + else + skipString(First, End); + continue; + } + + // Iterate over comments correctly. + if (*First != '/' || End - First < 2) { + ++First; + continue; + } + + if (First[1] == '/') { + // "//...". + skipLineComment(First, End); + continue; + } + + if (First[1] != '*') { + ++First; + continue; + } + + // "/*...*/". + skipBlockComment(First, End); + } + if (First == End) + return; + + // Skip over the newline. + assert(isVerticalWhitespace(*First)); + skipNewline(First, End); + } while (First[-2] == '\\'); // Continue past line-continuations. +} + +static void skipDirective(StringRef Name, const char *&First, + const char *const End) { + if (llvm::StringSwitch(Name) + .Case("warning", true) + .Case("error", true) + .Default(false)) + // Do not process quotes or comments. + skipToNewlineRaw(First, End); + else + skipLine(First, End); +} + +void Minimizer::printToNewline(const char *&First, const char *const End) { + while (First != End && !isVerticalWhitespace(*First)) { + const char *Last = First; + do { + // Iterate over strings correctly to avoid comments and newlines. + if (*Last == '\"' || *Last == '\'') { + if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) + skipRawString(Last, End); + else + skipString(Last, End); + continue; + } + if (*Last != '/' || End - Last < 2) { + ++Last; + continue; // Gather the rest up to print verbatim. + } + + if (Last[1] != '/' && Last[1] != '*') { + ++Last; + continue; + } + + // Deal with "//..." and "/*...*/". + append(First, reverseOverSpaces(First, Last)); + First = Last; + + if (Last[1] == '/') { + skipLineComment(First, End); + return; + } + + put(' '); + skipBlockComment(First, End); + skipOverSpaces(First, End); + Last = First; + } while (Last != End && !isVerticalWhitespace(*Last)); + + // Print out the string. + if (Last == End || Last == First || Last[-1] != '\\') { + append(First, reverseOverSpaces(First, Last)); + return; + } + + // Print up to the backslash, backing up over spaces. + append(First, reverseOverSpaces(First, Last - 1)); + + First = Last; + skipNewline(First, End); + skipOverSpaces(First, End); + } +} + +static void skipWhitespace(const char *&First, const char *const End) { + for (;;) { + assert(First <= End); + skipOverSpaces(First, End); + + if (End - First < 2) + return; + + if (First[0] == '\\' && isVerticalWhitespace(First[1])) { + skipNewline(++First, End); + continue; + } + + // Check for a non-comment character. + if (First[0] != '/') + return; + + // "// ...". + if (First[1] == '/') { + skipLineComment(First, End); + return; + } + + // Cannot be a comment. + if (First[1] != '*') + return; + + // "/*...*/". + skipBlockComment(First, End); + } +} + +void Minimizer::printAdjacentModuleNameParts(const char *&First, + const char *const End) { + // Skip over parts of the body. + const char *Last = First; + do + ++Last; + while (Last != End && (isIdentifierBody(*Last) || *Last == '.')); + append(First, Last); + First = Last; +} + +bool Minimizer::printAtImportBody(const char *&First, const char *const End) { + for (;;) { + skipWhitespace(First, End); + if (First == End) + return true; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + continue; + } + + // Found a semicolon. + if (*First == ';') { + put(*First++).put('\n'); + return false; + } + + // Don't handle macro expansions inside @import for now. + if (!isIdentifierBody(*First) && *First != '.') + return true; + + printAdjacentModuleNameParts(First, End); + } +} + +void Minimizer::printDirectiveBody(const char *&First, const char *const End) { + skipWhitespace(First, End); // Skip initial whitespace. + printToNewline(First, End); + while (Out.back() == ' ') + Out.pop_back(); + put('\n'); +} + +LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, + const char *const End) { + assert(isIdentifierBody(*First) && "invalid identifer"); + const char *Last = First + 1; + while (Last != End && isIdentifierBody(*Last)) + ++Last; + return Last; +} + +LLVM_NODISCARD static const char * +getIdentifierContinuation(const char *First, const char *const End) { + if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) + return nullptr; + + ++First; + skipNewline(First, End); + if (First == End) + return nullptr; + return isIdentifierBody(First[0]) ? First : nullptr; +} + +Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, + const char *const End) { + const char *Last = lexRawIdentifier(First, End); + const char *Next = getIdentifierContinuation(Last, End); + if (LLVM_LIKELY(!Next)) + return IdInfo{Last, StringRef(First, Last - First)}; + + // Slow path, where identifiers are split over lines. + SmallVector Id(First, Last); + while (Next) { + Last = lexRawIdentifier(Next, End); + Id.append(Next, Last); + Next = getIdentifierContinuation(Last, End); + } + return IdInfo{ + Last, + SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; +} + +void Minimizer::printAdjacentMacroArgs(const char *&First, + const char *const End) { + // Skip over parts of the body. + const char *Last = First; + do + ++Last; + while (Last != End && + (isIdentifierBody(*Last) || *Last == '.' || *Last == ',')); + append(First, Last); + First = Last; +} + +bool Minimizer::printMacroArgs(const char *&First, const char *const End) { + assert(*First == '('); + put(*First++); + for (;;) { + skipWhitespace(First, End); + if (First == End) + return true; + + if (*First == ')') { + put(*First++); + return false; + } + + // This is intentionally fairly liberal. + if (!(isIdentifierBody(*First) || *First == '.' || *First == ',')) + return true; + + printAdjacentMacroArgs(First, End); + } +} + +/// Looks for an identifier starting from Last. +/// +/// Updates "First" to just past the next identifier, if any. Returns true iff +/// the identifier matches "Id". +bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, + const char *const End) { + skipWhitespace(First, End); + if (First == End || !isIdentifierHead(*First)) + return false; + + IdInfo FoundId = lexIdentifier(First, End); + First = FoundId.Last; + return FoundId.Name == Id; +} + +bool Minimizer::lexAt(const char *&First, const char *const End) { + // Handle "@import". + const char *ImportLoc = First++; + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + makeToken(decl_at_import); + append("@import "); + if (printAtImportBody(First, End)) + return reportError( + ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import); + skipWhitespace(First, End); + if (First == End) + return false; + if (!isVerticalWhitespace(*First)) + return reportError( + ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import); + skipNewline(First, End); + return false; +} + +bool Minimizer::lexDefine(const char *&First, const char *const End) { + makeToken(pp_define); + append("#define "); + skipWhitespace(First, End); + + if (!isIdentifierHead(*First)) + return reportError(First, diag::err_pp_macro_not_identifier); + + IdInfo Id = lexIdentifier(First, End); + const char *Last = Id.Last; + append(Id.Name); + if (Last == End) + return false; + if (*Last == '(') { + size_t Size = Out.size(); + if (printMacroArgs(Last, End)) { + // Be robust to bad macro arguments, since they can show up in disabled + // code. + Out.resize(Size); + append("(/* invalid */\n"); + skipLine(Last, End); + return false; + } + } + skipWhitespace(Last, End); + if (Last == End) + return false; + if (!isVerticalWhitespace(*Last)) + put(' '); + printDirectiveBody(Last, End); + First = Last; + return false; +} + +bool Minimizer::lexPragma(const char *&First, const char *const End) { + // #pragma. + if (!isNextIdentifier("clang", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang. + if (!isNextIdentifier("module", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module. + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module import. + makeToken(pp_pragma_import); + append("#pragma clang module import "); + printDirectiveBody(First, End); + return false; +} + +bool Minimizer::lexEndif(const char *&First, const char *const End) { + // Strip out "#else" if it's empty. + if (top() == pp_else) + popToken(); + + // Strip out "#elif" if they're empty. + while (top() == pp_elif) + popToken(); + + // If "#if" is empty, strip it and skip the "#endif". + if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) { + popToken(); + skipLine(First, End); + return false; + } + + return lexDefault(pp_endif, "endif", First, End); +} + +bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, + const char *&First, const char *const End) { + makeToken(Kind); + put('#').append(Directive).put(' '); + printDirectiveBody(First, End); + return false; +} + +bool Minimizer::lexPPLine(const char *&First, const char *const End) { + assert(First != End); + + skipWhitespace(First, End); + assert(First <= End); + if (First == End) + return false; + + if (*First != '#' && *First != '@') { + skipLine(First, End); + assert(First <= End); + return false; + } + + // Handle "@import". + if (*First == '@') + return lexAt(First, End); + + // Handle preprocessing directives. + ++First; // Skip over '#'. + skipWhitespace(First, End); + + if (First == End) + return reportError(First, diag::err_pp_expected_eol); + + if (!isIdentifierHead(*First)) { + skipLine(First, End); + return false; + } + + // Figure out the token. + IdInfo Id = lexIdentifier(First, End); + First = Id.Last; + auto Kind = llvm::StringSwitch(Id.Name) + .Case("include", pp_include) + .Case("__include_macros", pp___include_macros) + .Case("define", pp_define) + .Case("undef", pp_undef) + .Case("import", pp_import) + .Case("include_next", pp_include_next) + .Case("if", pp_if) + .Case("ifdef", pp_ifdef) + .Case("ifndef", pp_ifndef) + .Case("elif", pp_elif) + .Case("else", pp_else) + .Case("endif", pp_endif) + .Case("pragma", pp_pragma_import) + .Default(pp_none); + if (Kind == pp_none) { + skipDirective(Id.Name, First, End); + return false; + } + + if (Kind == pp_endif) + return lexEndif(First, End); + + if (Kind == pp_define) + return lexDefine(First, End); + + if (Kind == pp_pragma_import) + return lexPragma(First, End); + + // Everything else. + return lexDefault(Kind, Id.Name, First, End); +} + +bool Minimizer::minimizeImpl(const char *First, const char *const End) { + while (First != End) + if (lexPPLine(First, End)) + return true; + return false; +} + +bool Minimizer::minimize() { + bool Error = minimizeImpl(Input.begin(), Input.end()); + + if (!Error) { + // Add a trailing newline and an EOF on success. + if (!Out.empty() && Out.back() != '\n') + Out.push_back('\n'); + makeToken(pp_eof); + } + + // Null-terminate the output. This way the memory buffer that's passed to + // Clang will not have to worry about the terminating '\0'. + Out.push_back(0); + Out.pop_back(); + return Error; +} + +bool clang::minimizeSourceToDependencyDirectives( + StringRef Input, SmallVectorImpl &Output, + SmallVectorImpl &Tokens, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) { + Output.clear(); + Tokens.clear(); + return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize(); +} Index: cfe/trunk/test/Frontend/minimize_source_to_dependency_directives.c =================================================================== --- cfe/trunk/test/Frontend/minimize_source_to_dependency_directives.c +++ cfe/trunk/test/Frontend/minimize_source_to_dependency_directives.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s > %t +// RUN: echo END. >> %t +// RUN: FileCheck < %t %s + +#ifdef FOO +#include "a.h" +#else +void skipThisCode(); +#endif + +// CHECK: #ifdef FOO +// CHECK-NEXT: #include "a.h" +// CHECK-NEXT: #endif +// CHECK-NEXT: END. Index: cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m =================================================================== --- cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m +++ cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 + +@import x; a // expected-error {{unexpected extra tokens at end of @import declaration}} Index: cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m =================================================================== --- cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m +++ cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 + +@import x // expected-error {{could not find ';' after @import}} Index: cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c =================================================================== --- cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c +++ cfe/trunk/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 + +#define 0 0 // expected-error {{macro name must be an identifier}} Index: cfe/trunk/unittests/Lex/CMakeLists.txt =================================================================== --- cfe/trunk/unittests/Lex/CMakeLists.txt +++ cfe/trunk/unittests/Lex/CMakeLists.txt @@ -3,6 +3,7 @@ ) add_clang_unittest(LexTests + DependencyDirectivesSourceMinimizerTest.cpp HeaderMapTest.cpp HeaderSearchTest.cpp LexerTest.cpp Index: cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp =================================================================== --- cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -0,0 +1,508 @@ +//===- unittests/Lex/DependencyDirectivesSourceMinimizer.cpp - -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "llvm/ADT/SmallString.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; +using namespace clang::minimize_source_to_dependency_directives; + +namespace clang { + +bool minimizeSourceToDependencyDirectives(StringRef Input, + SmallVectorImpl &Out) { + SmallVector Tokens; + return minimizeSourceToDependencyDirectives(Input, Out, Tokens); +} + +} // end namespace clang + +namespace { + +TEST(MinimizeSourceToDependencyDirectivesTest, Empty) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens)); + EXPECT_TRUE(Out.empty()); + ASSERT_EQ(1u, Tokens.size()); + ASSERT_EQ(pp_eof, Tokens.back().K); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens)); + EXPECT_TRUE(Out.empty()); + ASSERT_EQ(1u, Tokens.size()); + ASSERT_EQ(pp_eof, Tokens.back().K); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define A\n" + "#undef A\n" + "#endif\n" + "#if A\n" + "#ifdef A\n" + "#ifndef A\n" + "#elif A\n" + "#else\n" + "#include \n" + "#include_next \n" + "#__include_macros \n" + "#import \n" + "@import A;\n" + "#pragma clang module import A\n", + Out, Tokens)); + EXPECT_EQ(pp_define, Tokens[0].K); + EXPECT_EQ(pp_undef, Tokens[1].K); + EXPECT_EQ(pp_endif, Tokens[2].K); + EXPECT_EQ(pp_if, Tokens[3].K); + EXPECT_EQ(pp_ifdef, Tokens[4].K); + EXPECT_EQ(pp_ifndef, Tokens[5].K); + EXPECT_EQ(pp_elif, Tokens[6].K); + EXPECT_EQ(pp_else, Tokens[7].K); + EXPECT_EQ(pp_include, Tokens[8].K); + EXPECT_EQ(pp_include_next, Tokens[9].K); + EXPECT_EQ(pp___include_macros, Tokens[10].K); + EXPECT_EQ(pp_import, Tokens[11].K); + EXPECT_EQ(decl_at_import, Tokens[12].K); + EXPECT_EQ(pp_pragma_import, Tokens[13].K); + EXPECT_EQ(pp_eof, Tokens[14].K); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, Define) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO", Out, Tokens)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + ASSERT_EQ(2u, Tokens.size()); + ASSERT_EQ(pp_define, Tokens.front().K); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineSpacing) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO\n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO \n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO a \n\n\n", Out)); + EXPECT_STREQ("#define MACRO a\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO\n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineMacroArguments) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO()", Out)); + EXPECT_STREQ("#define MACRO()\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, b...)", Out)); + EXPECT_STREQ("#define MACRO(a,b...)\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO content", Out)); + EXPECT_STREQ("#define MACRO content\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO con tent ", Out)); + EXPECT_STREQ("#define MACRO con tent\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO() con tent ", Out)); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO(\t)\tcon \t tent\t", Out)); + EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO(\f)\fcon \f tent\f", Out)); + EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO(\v)\vcon \v tent\v", Out)); + EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out)); + EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a \\\n" + " )", + Out)); + EXPECT_STREQ("#define MACRO(a)\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, \\\n" + " b) \\\n" + " call((a), \\\n" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, + DefineMultilineArgsCarriageReturn) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, \\\r" + " b) \\\r" + " call((a), \\\r" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, + DefineMultilineArgsCarriageReturnNewline) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, \\\r\n" + " b) \\\r\n" + " call((a), \\\r\n" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, + DefineMultilineArgsNewlineCarriageReturn) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, \\\n\r" + " b) \\\n\r" + " call((a), \\\n\r" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) { + SmallVector Out; + + ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out)); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) { + SmallVector Out; + + ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out)); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out)); + EXPECT_STREQ("#define AND &\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n" + "&\n", + Out)); + EXPECT_STREQ("#define AND &\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO a/*\n" + " /*\n" + "#define MISSING abc\n" + " /*\n" + " /* something */ \n" + "#include /* \"def\" */ \n", + Out)); + EXPECT_STREQ("#define MACRO a\n" + "#include \n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, MultilineCommentInStrings) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO1 \"/*\"\n" + "#define MACRO2 \"*/\"\n", + Out)); + EXPECT_STREQ("#define MACRO1 \"/*\"\n" + "#define MACRO2 \"*/\"\n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "#define B\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifdef A\n" + "#define B\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "#define B\n" + "#elif B\n" + "#define C\n" + "#elif C\n" + "#define D\n" + "#else\n" + "#define E\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifdef A\n" + "#define B\n" + "#elif B\n" + "#define C\n" + "#elif C\n" + "#define D\n" + "#else\n" + "#define E\n" + "#endif\n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIfdef) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "#elif B\n" + "#elif C\n" + "#else D\n" + "#endif\n", + Out)); + EXPECT_STREQ("", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#pragma clang module\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#pragma clang module impor\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#pragma clang module import\n", Out)); + EXPECT_STREQ("#pragma clang module import\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, Include) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include \"A\"\n", Out)); + EXPECT_STREQ("#include \"A\"\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include \n", Out)); + EXPECT_STREQ("#include \n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#include_next \n", Out)); + EXPECT_STREQ("#include_next \n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#import \n", Out)); + EXPECT_STREQ("#import \n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#__include_macros \n", Out)); + EXPECT_STREQ("#__include_macros \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A;\n", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import A;\n", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out)); + EXPECT_STREQ("@import A.B;\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \n /*x*/ ; /*x*/", Out)); + EXPECT_STREQ("@import A.B;\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) { + SmallVector Out; + + ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out)); + ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); + ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifndef GUARD\n" + "#define GUARD\n" + "R\"()\"\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#ifndef GUARD\n" + "#define GUARD\n" + R"raw(static constexpr char bytes[] = R"(-?:\,[]{}#&*!|>'"%@`)";)raw" + "\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#ifndef GUARD\n" + "#define GUARD\n" + R"raw(static constexpr char bytes[] = R"abc(-?:\,[]{}#&*!|>'"%@`)abc";)raw" + "\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#if\\\n" + "ndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" + "RD\n", + Out)); + EXPECT_STREQ("#define GUARD\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r" + "RD\n", + Out)); + EXPECT_STREQ("#define GUARD\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" + " RD\n", + Out)); + EXPECT_STREQ("#define GUA RD\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) { + SmallVector Out; + + for (auto Source : { + "#warning '\n#include \n", + "#warning \"\n#include \n", + "#warning /*\n#include \n", + "#warning \\\n#include \n#include \n", + "#error '\n#include \n", + "#error \"\n#include \n", + "#error /*\n#include \n", + "#error \\\n#include \n#include \n", + }) { + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); + } + + for (auto Source : { + "#warning \\\n#include \n", + "#error \\\n#include \n", + "#if MACRO\n#warning '\n#endif\n", + "#if MACRO\n#warning \"\n#endif\n", + "#if MACRO\n#warning /*\n#endif\n", + "#if MACRO\n#error '\n#endif\n", + "#if MACRO\n#error \"\n#endif\n", + "#if MACRO\n#error /*\n#endif\n", + }) { + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("", Out.data()); + } +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteral) { + SmallVector Out; + + StringRef Source = R"( +#include +int a = 0'1; +int b = 0xfa'af'fa; +int c = 12 ' '; +#include +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n#include \n", Out.data()); +} + +} // end anonymous namespace