Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -1442,6 +1442,77 @@ llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText()); } +/// Skip an empty line or a block of comment that starts a line, i.e. there is +/// no non-whitespace characters before the comment in a line. +/// A block of comment is defined as: +/// - A single line commet, i.e. "// comment". +/// - A multi-line comment, i.e. "/*..\n..\n..*/" +/// - Blocks of comments that are connected at one line. For example, +/// /* comment +/// */ +/// and +/// /* comment +/// */ // comment +/// \param Lines All lines of the code. +/// \param Line The line that being checked. +/// \param From Ignore first \p From characters in \p Line. \p From is 0 by +/// default. +/// +/// \return The number of characters (of comments) skipped. We only skip +/// comments by lines, i.e. the number of characters skipped are always a +/// multiple of lines. +unsigned skipEmptyLineAndComment(const SmallVectorImpl &Lines, + SmallVectorImpl::iterator &Line, + size_t From = 0) { + StringRef Whitespaces = " \t\n\v\f\r"; + assert(From <= Line->size()); + auto FirstCharPos = Line->find_first_not_of(Whitespaces, From); + // Skip the whole line if it contains only whitespaces. + if (FirstCharPos == StringRef::npos) + return (Line++)->size() + 1; + if ((*Line)[FirstCharPos] != '/') + return 0; + StringRef Trimmed = Line->drop_front(FirstCharPos); + unsigned SkippedOffset = 0; + auto E = Lines.end(); + if (Trimmed.startswith("//")) { + if (!Trimmed.endswith("\\")) + return (Line++)->size() + 1; + // If the line ends with "\", then skip all following lines to the first + // line that does not end with "\" (inclusive). + do { + SkippedOffset += Line->size() + 1; + ++Line; + } while (Line != E && Line->endswith("\\")); + return (Line == E) ? SkippedOffset : (SkippedOffset + (Line++)->size() + 1); + } + if (!Trimmed.startswith("/*")) + return 0; + // Look for "*/" starting from the character after "/*" to avoid matching + // "*/" from "/*/". + size_t FirstPaired = Line->find("*/", FirstCharPos + 2); + while (FirstPaired == StringRef::npos && Line != E) { + SkippedOffset += (Line++)->size() + 1; + FirstPaired = Line->find("*/"); + } + if (Line == E) + return SkippedOffset; + // It is possible that there are comments after "*/". We recursively skip + // those comments by setting the start point as FirstParied + 2 to skip + // comments after "*/". + unsigned Skipped = skipEmptyLineAndComment(Lines, Line, FirstPaired + 2); + // FIXME: handle this case: + // /* comment + // */ int x; + // The correct end offset of the comment should be right after "*/". + // Currently, we treat a line that starts with "... */" followed by + // non-comment code as a whole line of comment. + return (Skipped == 0) ? (SkippedOffset + (Line++)->size() + 1) + : (SkippedOffset + Skipped); +} + +// FIXME: program fails if we are inserting at the end of the code and program +// does not end with "\n". Will fix this in a follow-up patch. // FIXME: do not insert headers into conditional #include blocks, e.g. #includes // surrounded by compile condition "#if...". // FIXME: do not insert existing headers. @@ -1485,9 +1556,28 @@ int FirstIncludeOffset = -1; int Offset = 0; int AfterHeaderGuard = 0; + // The offset after all comments and empty lines before any actual code. + int AfterTopCommentOffset = -1; SmallVector Lines; Code.split(Lines, '\n'); - for (auto Line : Lines) { + for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I) { + if (AfterTopCommentOffset == -1) { + // Skip empty lines and comments on top of the code. + unsigned Skipped; + do { + Skipped = skipEmptyLineAndComment(Lines, I); + Offset += Skipped; + } while (Skipped > 0 && I != E); + AfterTopCommentOffset = Offset; + // If the file contains only comments and empty lines, we just insert the + // header after the last line. + if (I == E) { + // Don't let offset go beyond the file. + AfterTopCommentOffset--; + break; + } + } + StringRef Line = *I; if (IncludeRegex.match(Line, &Matches)) { StringRef IncludeName = Matches[2]; int Category = Categories.getIncludePriority( @@ -1506,14 +1596,14 @@ // - Ensure that CategoryEndOffset[Highest] is always populated. // - If CategoryEndOffset[Priority] isn't set, use the next higher value that // is set, up to CategoryEndOffset[Highest]. - // FIXME: skip comment section in the beginning of the code if there is no - // existing #include and #define. auto Highest = Priorities.begin(); if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) { if (FirstIncludeOffset >= 0) CategoryEndOffsets[*Highest] = FirstIncludeOffset; - else + else if (AfterHeaderGuard > 0) CategoryEndOffsets[*Highest] = AfterHeaderGuard; + else + CategoryEndOffsets[*Highest] = AfterTopCommentOffset; } // By this point, CategoryEndOffset[Highest] is always set appropriately: // - to an appropriate location before/after existing #includes, or Index: unittests/Format/CleanupTest.cpp =================================================================== --- unittests/Format/CleanupTest.cpp +++ unittests/Format/CleanupTest.cpp @@ -465,13 +465,13 @@ TEST_F(CleanUpReplacementsTest, InsertMultipleNewHeadersAndSortLLVM) { std::string Code = "\nint x;"; - std::string Expected = "#include \"fix.h\"\n" + std::string Expected = "\n#include \"fix.h\"\n" "#include \"a.h\"\n" "#include \"b.h\"\n" "#include \"c.h\"\n" "#include \n" "#include \n" - "\nint x;"; + "int x;"; tooling::Replacements Replaces = {createInsertion("#include \"a.h\""), createInsertion("#include \"c.h\""), createInsertion("#include \"b.h\""), @@ -483,13 +483,13 @@ TEST_F(CleanUpReplacementsTest, InsertMultipleNewHeadersAndSortGoogle) { std::string Code = "\nint x;"; - std::string Expected = "#include \"fix.h\"\n" + std::string Expected = "\n#include \"fix.h\"\n" "#include \n" "#include \n" "#include \"a.h\"\n" "#include \"b.h\"\n" "#include \"c.h\"\n" - "\nint x;"; + "int x;"; tooling::Replacements Replaces = {createInsertion("#include \"a.h\""), createInsertion("#include \"c.h\""), createInsertion("#include \"b.h\""), @@ -502,21 +502,22 @@ TEST_F(CleanUpReplacementsTest, FormatCorrectLineWhenHeadersAreInserted) { std::string Code = "\n" + "int x;\n" "int a;\n" "int a;\n" "int a;"; - std::string Expected = "#include \"x.h\"\n" + std::string Expected = "\n#include \"x.h\"\n" "#include \"y.h\"\n" "#include \"clang/x/x.h\"\n" "#include \n" "#include \n" - "\n" + "int x;\n" "int a;\n" "int b;\n" "int a;"; tooling::Replacements Replaces = { - createReplacement(getOffset(Code, 3, 8), 1, "b"), + createReplacement(getOffset(Code, 4, 8), 1, "b"), createInsertion("#include "), createInsertion("#include "), createInsertion("#include \"clang/x/x.h\""), @@ -537,6 +538,54 @@ EXPECT_EQ(Expected, formatAndApply(Code, Replaces)); } +TEST_F(CleanUpReplacementsTest, SkippedTopComment) { + std::string Code = "// comment\n" + "\n" + " // comment\n"; + std::string Expected = "// comment\n" + "\n" + " // comment\n" + "#include \n"; + tooling::Replacements Replaces = {createInsertion("#include ")}; + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, SkippedMixedComments) { + std::string Code = "// comment\n" + "// comment \\\n" + " comment continued\n" + "/*\n" + "* comment\n" + "*/\n"; + std::string Expected = "// comment\n" + "// comment \\\n" + " comment continued\n" + "/*\n" + "* comment\n" + "*/\n" + "#include \n"; + tooling::Replacements Replaces = {createInsertion("#include ")}; + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, MultipleBlockCommentsInOneLine) { + std::string Code = "/*\n" + "* comment\n" + "*/ /* comment\n" + "*/\n" + "\n\n" + "/* c1 */ /*c2 */\n"; + std::string Expected = "/*\n" + "* comment\n" + "*/ /* comment\n" + "*/\n" + "\n\n" + "/* c1 */ /*c2 */\n" + "#include \n"; + tooling::Replacements Replaces = {createInsertion("#include ")}; + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + } // end namespace } // end namespace format } // end namespace clang