Index: lib/Lex/DependencyDirectivesSourceMinimizer.cpp =================================================================== --- lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -113,7 +113,8 @@ } static void skipOverSpaces(const char *&First, const char *const End) { - while (First != End && isHorizontalWhitespace(*First)) + while (First != End && + (isHorizontalWhitespace(*First) || !clang::isASCII(*First))) ++First; } @@ -185,8 +186,8 @@ } static void skipString(const char *&First, const char *const End) { - assert(*First == '\'' || *First == '"'); - const char Terminator = *First; + assert(*First == '\'' || *First == '"' || *First == '<'); + const char Terminator = *First == '<' ? '>' : *First; for (++First; First != End && *First != Terminator; ++First) if (*First == '\\') if (++First == End) @@ -195,15 +196,27 @@ ++First; // Finish off the string. } -static void skipNewline(const char *&First, const char *End) { - assert(isVerticalWhitespace(*First)); - ++First; +// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) +static unsigned isEOL(const char *First, const char *const End) { if (First == End) - return; + return 0; + if (End - First > 1) { + if (isVerticalWhitespace(First[0]) && isVerticalWhitespace(First[1]) && + First[0] != First[1]) + return 2; + } + return !!isVerticalWhitespace(First[0]); +} - // Check for "\n\r" and "\r\n". - if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0])) - ++First; +static unsigned skipNewline(const char *&First, const char *End) { + unsigned Len = isEOL(First, End); + assert(Len); + First += Len; + return Len; +} + +static bool wasLineContinuation(const char *First, unsigned Len) { + return First[-(int)Len - 1] == '\\'; } static void skipToNewlineRaw(const char *&First, const char *const End) { @@ -211,17 +224,22 @@ if (First == End) return; - if (isVerticalWhitespace(*First)) + unsigned Len = isEOL(First, End); + if (Len) return; - while (!isVerticalWhitespace(*First)) + do { if (++First == End) return; + Len = isEOL(First, End); + } while (!Len); + + First += Len; - if (First[-1] != '\\') + if (!wasLineContinuation(First, Len)) return; - ++First; // Keep going... + // Keep skipping lines... } } @@ -276,7 +294,7 @@ } static void skipLine(const char *&First, const char *const End) { - do { + for (;;) { assert(First <= End); if (First == End) return; @@ -321,9 +339,10 @@ return; // Skip over the newline. - assert(isVerticalWhitespace(*First)); - skipNewline(First, End); - } while (First[-2] == '\\'); // Continue past line-continuations. + unsigned Len = skipNewline(First, End); + if (!wasLineContinuation(First, Len)) // Continue past line-continuations. + break; + } } static void skipDirective(StringRef Name, const char *&First, @@ -350,6 +369,10 @@ skipString(Last, End); continue; } + if (top() == pp_include && *Last == '<') { + skipString(Last, End); + continue; + } if (*Last != '/' || End - Last < 2) { ++Last; continue; // Gather the rest up to print verbatim. @@ -378,6 +401,9 @@ // Print out the string. if (Last == End || Last == First || Last[-1] != '\\') { append(First, reverseOverSpaces(First, Last)); + + First = Last; + skipNewline(First, End); return; } Index: test/Lexer/minimize_source_to_dependency_directives_include.c =================================================================== --- test/Lexer/minimize_source_to_dependency_directives_include.c +++ test/Lexer/minimize_source_to_dependency_directives_include.c @@ -0,0 +1,8 @@ +// Test double slashes in #include directive along with angle brackets. Previously, this was interpreted as comments. +// RUN: %clang_cc1 -DTEST -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s + +#include "a//b.h" +#include + +// CHECK: #include "a//b.h" +// CHECK: #include Index: test/Lexer/minimize_source_to_dependency_directives_invalid_chars.c =================================================================== --- test/Lexer/minimize_source_to_dependency_directives_invalid_chars.c +++ test/Lexer/minimize_source_to_dependency_directives_invalid_chars.c @@ -0,0 +1,9 @@ +// Test invisible, bad characters just before #ifdef +// RUN: echo -n -e '\xef\xbb\xbf#ifdef TEST\n' > %t.c +// RUN: echo '#include ' >> %t.c +// RUN: echo '#endif' >> %t.c +// RUN: %clang_cc1 -DTEST -print-dependency-directives-minimized-source %t.c 2>&1 | FileCheck %s + +// CHECK: #ifdef TEST +// CHECK-NEXT: #include +// CHECK-NEXT: #endif Index: test/Lexer/minimize_source_to_dependency_directives_invalid_error.c =================================================================== --- test/Lexer/minimize_source_to_dependency_directives_invalid_error.c +++ test/Lexer/minimize_source_to_dependency_directives_invalid_error.c @@ -0,0 +1,16 @@ +// Test CF+LF are properly handled along with quoted, multi-line #error +// RUN: cat %s | unix2dos | %clang_cc1 -DOTHER -print-dependency-directives-minimized-source 2>&1 | FileCheck %s + +#ifndef TEST +#error "message \ + more message \ + even more" +#endif + +#ifdef OTHER +#include +#endif + +// CHECK: #ifdef OTHER +// CHECK-NEXT: #include +// CHECK-NEXT: #endif