Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #define DEBUG_TYPE "format-formatter" @@ -1687,7 +1688,7 @@ // Returns the priority of the category which \p IncludeName belongs to. // If \p CheckMainHeader is true and \p IncludeName is a main header, returns // 0. Otherwise, returns the priority of the matching category or INT_MAX. - int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) { + int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) const { int Ret = INT_MAX; for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) if (CategoryRegexs[i].match(IncludeName)) { @@ -1720,7 +1721,7 @@ bool IsMainFile; StringRef FileName; StringRef FileStem; - SmallVector CategoryRegexs; + mutable SmallVector CategoryRegexs; }; const char IncludeRegexPattern[] = @@ -1984,103 +1985,117 @@ }); } -bool isDeletedHeader(llvm::StringRef HeaderName, - const std::set &HeadersToDelete) { - return HeadersToDelete.count(HeaderName) || - HeadersToDelete.count(HeaderName.trim("\"<>")); -} +/// Generates replacements for inserting or deleting #include headers in a file. +class HeaderIncludes { +public: + HeaderIncludes(llvm::StringRef FileName, llvm::StringRef Code, + const FormatStyle &Style); + + /// Inserts an #include directive of \p IncludeName into the code. If \p + /// IncludeName is quoted e.g. <...> or "...", it will be #included as is; + /// by default, it is quoted with "". + /// + /// When searching for points to insert new header, this ignores #include's + /// after the #include block(s) in the beginning of a file to avoid inserting + /// headers into code sections where new #include's should not be added by + /// default. These code sections include: + /// - raw string literals (containing #include). + /// - #if blocks. + /// - Special #include's among declarations (e.g. functions). + /// + /// Returns a replacement that inserts the new header into a suitable #include + /// block (#include order is preserved with best effort e.g. if they are + /// already sorted; caller should consider sorting the #includes with + /// clang-format after insertions). If \p IncludeName already exists, this + /// returns None. + llvm::Optional insert(llvm::StringRef IncludeName) const; + + /// Removes all existing #includes of \p Header. If \p Header is not quoted + /// (e.g. without <> or ""), #include of \p Header with both quotations will + /// be removed. + tooling::Replacements remove(llvm::StringRef Header) const; -// FIXME: insert empty lines between newly created blocks. -tooling::Replacements -fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, - const FormatStyle &Style) { - if (!Style.isCpp()) - return Replaces; +private: + struct Include { + Include(StringRef Name, tooling::Range R) : Name(Name), R(R) {} + + // An include header quoted with either <> or "". + std::string Name; + // The range of the whole line of include directive including any eading + // whitespaces and trailing comment. + tooling::Range R; + }; - tooling::Replacements HeaderInsertions; - std::set HeadersToDelete; - tooling::Replacements Result; - for (const auto &R : Replaces) { - if (isHeaderInsertion(R)) { - // Replacements from \p Replaces must be conflict-free already, so we can - // simply consume the error. - llvm::consumeError(HeaderInsertions.add(R)); - } else if (isHeaderDeletion(R)) { - HeadersToDelete.insert(R.getReplacementText()); - } else if (R.getOffset() == UINT_MAX) { - llvm::errs() << "Insertions other than header #include insertion are " - "not supported! " - << R.getReplacementText() << "\n"; - } else { - llvm::consumeError(Result.add(R)); - } - } - if (HeaderInsertions.empty() && HeadersToDelete.empty()) - return Replaces; + void addExistingInclude(Include IncludeToAdd, unsigned NextLineOffset); - llvm::Regex IncludeRegex(IncludeRegexPattern); - llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)"); - SmallVector Matches; + StringRef FileName; + StringRef Code; - StringRef FileName = Replaces.begin()->getFilePath(); - IncludeCategoryManager Categories(Style, FileName); + // Map from include name (quotation trimmed) to a list of existing includes (in case + // there are more than one) with the name in the current file. + // and "x" will be treated as the same header when deleting #includes. + llvm::StringMap> ExistingIncludes; + + std::unordered_map> + IncludesByPriority; + int FirstIncludeOffset; + // All new headers should be inserted after this offset. + unsigned MinInsertOffset; + // Code with the first MinInsertOffset characters trimmed. + StringRef TrimmedCode; + // Max insertion offset in the original code. + unsigned MaxInsertOffset; + IncludeCategoryManager Categories; // Record the offset of the end of the last include in each category. - std::map CategoryEndOffsets; + std::unordered_map CategoryEndOffsets; + // All possible priorities. - // Add 0 for main header and INT_MAX for headers that are not in any category. - std::set Priorities = {0, INT_MAX}; + std::set Priorities; + + // Matches a whole #include directive. + llvm::Regex IncludeRegex; +}; + +HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code, + const FormatStyle &Style) + : FileName(FileName), Code(Code), FirstIncludeOffset(-1), + MinInsertOffset( + getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), + TrimmedCode(Code.drop_front(MinInsertOffset)), + MaxInsertOffset(MinInsertOffset + getMaxHeaderInsertionOffset( + FileName, TrimmedCode, Style)), + Categories(Style, FileName), + IncludeRegex(llvm::Regex(IncludeRegexPattern)) { + // Add 0 for main header and INT_MAX for headers that are not in any + // category. + Priorities = {0, INT_MAX}; for (const auto &Category : Style.IncludeCategories) Priorities.insert(Category.Priority); - int FirstIncludeOffset = -1; - // All new headers should be inserted after this offset. - unsigned MinInsertOffset = - getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style); - StringRef TrimmedCode = Code.drop_front(MinInsertOffset); - // Max insertion offset in the original code. - unsigned MaxInsertOffset = - MinInsertOffset + - getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style); SmallVector Lines; TrimmedCode.split(Lines, '\n'); + unsigned Offset = MinInsertOffset; unsigned NextLineOffset; - std::set ExistingIncludes; + SmallVector Matches; for (auto Line : Lines) { NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); if (IncludeRegex.match(Line, &Matches)) { - // The header name with quotes or angle brackets. - StringRef IncludeName = Matches[2]; - ExistingIncludes.insert(IncludeName); - // Only record the offset of current #include if we can insert after it. - if (Offset <= MaxInsertOffset) { - int Category = Categories.getIncludePriority( - IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); - CategoryEndOffsets[Category] = NextLineOffset; - if (FirstIncludeOffset < 0) - FirstIncludeOffset = Offset; - } - if (isDeletedHeader(IncludeName, HeadersToDelete)) { - // If this is the last line without trailing newline, we need to make - // sure we don't delete across the file boundary. - unsigned Length = std::min(Line.size() + 1, Code.size() - Offset); - llvm::Error Err = - Result.add(tooling::Replacement(FileName, Offset, Length, "")); - if (Err) { - // Ignore the deletion on conflict. - llvm::errs() << "Failed to add header deletion replacement for " - << IncludeName << ": " << llvm::toString(std::move(Err)) - << "\n"; - } - } + // If this is the last line without trailing newline, we need to make + // sure we don't delete across the file boundary. + addExistingInclude( + Include(Matches[2], + tooling::Range( + Offset, std::min(Line.size() + 1, Code.size() - Offset))), + NextLineOffset); } Offset = NextLineOffset; } // Populate CategoryEndOfssets: // - Ensure that CategoryEndOffset[Highest] is always populated. - // - If CategoryEndOffset[Priority] isn't set, use the next higher value that - // is set, up to CategoryEndOffset[Highest]. + // - If CategoryEndOffset[Priority] isn't set, use the next higher value + // that is set, up to CategoryEndOffset[Highest]. auto Highest = Priorities.begin(); if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) { if (FirstIncludeOffset >= 0) @@ -2095,8 +2110,139 @@ for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I) if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end()) CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)]; +} + +inline StringRef trimInclude(StringRef IncludeName) { + return IncludeName.trim("\"<>"); +} - bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n'; +// \p Offset: the start of the line following this include directive. +void HeaderIncludes::addExistingInclude(Include IncludeToAdd, + unsigned NextLineOffset) { + auto Iter = + ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first; + Iter->second.push_back(std::move(IncludeToAdd)); + auto &CurInclude = Iter->second.back(); + // The header name with quotes or angle brackets. + // Only record the offset of current #include if we can insert after it. + if (CurInclude.R.getOffset() <= MaxInsertOffset) { + int Priority = Categories.getIncludePriority( + CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0); + CategoryEndOffsets[Priority] = NextLineOffset; + IncludesByPriority[Priority].push_back(&CurInclude); + if (FirstIncludeOffset < 0) + FirstIncludeOffset = CurInclude.R.getOffset(); + } +} + +llvm::Optional +HeaderIncludes::insert(llvm::StringRef IncludeName) const { + std::string TrimmedName = trimInclude(IncludeName); + // If a
("header") already exists in code, "header" (
) with + // different quotation will not be inserted. + if (ExistingIncludes.find(TrimmedName) != ExistingIncludes.end()) + return llvm::None; + std::string Quoted = + (IncludeName.startswith("<") || IncludeName.startswith("\"")) + ? IncludeName.str() + : ("\"" + IncludeName + "\"").str(); + StringRef QuotedName = Quoted; + int Priority = Categories.getIncludePriority( + QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0); + auto CatOffset = CategoryEndOffsets.find(Priority); + assert(CatOffset != CategoryEndOffsets.end()); + unsigned InsertOffset = CatOffset->second; // Fall back offset + auto Iter = IncludesByPriority.find(Priority); + if (Iter != IncludesByPriority.end()) { + for (const auto *Inc : Iter->second) { + if (QuotedName < Inc->Name) { + InsertOffset = Inc->R.getOffset(); + break; + } + } + } + assert(InsertOffset <= Code.size()); + std::string NewInclude = ("#include " + QuotedName + "\n").str(); + // When inserting headers at end of the code, also append '\n' to the code + // if it does not end with '\n'. + // FIXME: when inserting multiple #includes at the end of code, only one + // newline should be added. + if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n')) + NewInclude = "\n" + NewInclude; + return tooling::Replacement(FileName, InsertOffset, 0, NewInclude); +} + +tooling::Replacements +HeaderIncludes::remove(llvm::StringRef IncludeName) const { + StringRef TrimmedName = trimInclude(IncludeName); + tooling::Replacements Result; + auto Iter = ExistingIncludes.find(TrimmedName); + if (Iter == ExistingIncludes.end()) + return Result; + for (const auto &Inc : Iter->second) { + assert(trimInclude(Inc.Name) == TrimmedName); + // If IncludeName is not quoted, then both quotations are deleted. + if (IncludeName != TrimmedName && + IncludeName != Inc.Name) // Different quotations, skip. + continue; + llvm::Error Err = Result.add(tooling::Replacement( + FileName, Inc.R.getOffset(), Inc.R.getLength(), "")); + if (Err) { + auto ErrMsg = "Unexpected conflicts in #include deletions: " + + llvm::toString(std::move(Err)); + llvm_unreachable(ErrMsg.c_str()); + } + } + return Result; +} + +// FIXME: insert empty lines between newly created blocks. +tooling::Replacements +fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, + const FormatStyle &Style) { + if (!Style.isCpp()) + return Replaces; + + tooling::Replacements HeaderInsertions; + std::set HeadersToDelete; + tooling::Replacements Result; + for (const auto &R : Replaces) { + if (isHeaderInsertion(R)) { + // Replacements from \p Replaces must be conflict-free already, so we can + // simply consume the error. + llvm::consumeError(HeaderInsertions.add(R)); + } else if (isHeaderDeletion(R)) { + HeadersToDelete.insert(R.getReplacementText()); + } else if (R.getOffset() == UINT_MAX) { + llvm::errs() << "Insertions other than header #include insertion are " + "not supported! " + << R.getReplacementText() << "\n"; + } else { + llvm::consumeError(Result.add(R)); + } + } + if (HeaderInsertions.empty() && HeadersToDelete.empty()) + return Replaces; + + + StringRef FileName = Replaces.begin()->getFilePath(); + HeaderIncludes Includes(FileName, Code, Style); + + for (const auto &Header : HeadersToDelete) { + tooling::Replacements Replaces = Includes.remove(Header); + for (const auto &R : Replaces) { + auto Err = Result.add(R); + if (Err) { + // Ignore the deletion on conflict. + llvm::errs() << "Failed to add header deletion replacement for " + << Header << ": " << llvm::toString(std::move(Err)) + << "\n"; + } + } + } + + llvm::Regex IncludeRegex = llvm::Regex(IncludeRegexPattern); + llvm::SmallVector Matches; for (const auto &R : HeaderInsertions) { auto IncludeDirective = R.getReplacementText(); bool Matched = IncludeRegex.match(IncludeDirective, &Matches); @@ -2104,30 +2250,16 @@ "'#include ...'"); (void)Matched; auto IncludeName = Matches[2]; - if (ExistingIncludes.find(IncludeName) != ExistingIncludes.end()) { - DEBUG(llvm::dbgs() << "Skip adding existing include : " << IncludeName - << "\n"); - continue; - } - int Category = - Categories.getIncludePriority(IncludeName, /*CheckMainHeader=*/true); - Offset = CategoryEndOffsets[Category]; - std::string NewInclude = !IncludeDirective.endswith("\n") - ? (IncludeDirective + "\n").str() - : IncludeDirective.str(); - // When inserting headers at end of the code, also append '\n' to the code - // if it does not end with '\n'. - if (NeedNewLineAtEnd && Offset == Code.size()) { - NewInclude = "\n" + NewInclude; - NeedNewLineAtEnd = false; - } - auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude); - auto Err = Result.add(NewReplace); - if (Err) { - llvm::consumeError(std::move(Err)); - unsigned NewOffset = Result.getShiftedCodePosition(Offset); - NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude); - Result = Result.merge(tooling::Replacements(NewReplace)); + auto Replace = Includes.insert(IncludeName); + if (Replace) { + auto Err = Result.add(*Replace); + if (Err) { + llvm::consumeError(std::move(Err)); + unsigned NewOffset = Result.getShiftedCodePosition(Replace->getOffset()); + auto Shifted = tooling::Replacement(FileName, NewOffset, 0, + Replace->getReplacementText()); + Result = Result.merge(tooling::Replacements(Shifted)); + } } } return Result; Index: unittests/Format/CleanupTest.cpp =================================================================== --- unittests/Format/CleanupTest.cpp +++ unittests/Format/CleanupTest.cpp @@ -471,19 +471,77 @@ EXPECT_EQ(Expected, apply(Code, Replaces)); } +TEST_F(CleanUpReplacementsTest, InsertIntoBlockSorted) { + std::string Code = "#include \"x/fix.h\"\n" + "#include \"a.h\"\n" + "#include \"c.h\"\n" + "#include \n"; + std::string Expected = "#include \"x/fix.h\"\n" + "#include \"a.h\"\n" + "#include \"b.h\"\n" + "#include \"c.h\"\n" + "#include \n"; + tooling::Replacements Replaces = + toReplacements({createInsertion("#include \"b.h\"")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, InsertIntoFirstBlockOfSameKind) { + std::string Code = "#include \"x/fix.h\"\n" + "#include \"c.h\"\n" + "#include \"e.h\"\n" + "#include \"f.h\"\n" + "#include \n" + "#include \n" + "#include \"m.h\"\n" + "#include \"n.h\"\n"; + std::string Expected = "#include \"x/fix.h\"\n" + "#include \"c.h\"\n" + "#include \"d.h\"\n" + "#include \"e.h\"\n" + "#include \"f.h\"\n" + "#include \n" + "#include \n" + "#include \"m.h\"\n" + "#include \"n.h\"\n"; + tooling::Replacements Replaces = + toReplacements({createInsertion("#include \"d.h\"")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, InsertIntoSystemBlockSorted) { + std::string Code = "#include \"x/fix.h\"\n" + "#include \"a.h\"\n" + "#include \"c.h\"\n" + "#include \n" + "#include \n"; + std::string Expected = "#include \"x/fix.h\"\n" + "#include \"a.h\"\n" + "#include \"c.h\"\n" + "#include \n" + "#include \n" + "#include \n"; + tooling::Replacements Replaces = + toReplacements({createInsertion("#include ")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + + TEST_F(CleanUpReplacementsTest, InsertMultipleIncludesLLVMStyle) { std::string Code = "#include \"x/fix.h\"\n" "#include \"a.h\"\n" "#include \"b.h\"\n" + "#include \"z.h\"\n" "#include \"clang/Format/Format.h\"\n" "#include \n"; std::string Expected = "#include \"x/fix.h\"\n" "#include \"a.h\"\n" "#include \"b.h\"\n" "#include \"new/new.h\"\n" + "#include \"z.h\"\n" "#include \"clang/Format/Format.h\"\n" - "#include \n" - "#include \n"; + "#include \n" + "#include \n"; tooling::Replacements Replaces = toReplacements({createInsertion("#include "), createInsertion("#include \"new/new.h\"")}); @@ -517,12 +575,12 @@ "#include \"z/b.h\"\n"; std::string Expected = "#include \"x/fix.h\"\n" "\n" - "#include \n" "#include \n" + "#include \n" "\n" + "#include \"x/x.h\"\n" "#include \"y/a.h\"\n" - "#include \"z/b.h\"\n" - "#include \"x/x.h\"\n"; + "#include \"z/b.h\"\n"; tooling::Replacements Replaces = toReplacements({createInsertion("#include "), createInsertion("#include \"x/x.h\"")}); @@ -776,8 +834,10 @@ TEST_F(CleanUpReplacementsTest, NoNewLineAtTheEndOfCodeMultipleInsertions) { std::string Code = "#include "; + // FIXME: a better behavior is to only append on newline to Code, but this + // case should be rare in practice. std::string Expected = - "#include \n#include \n#include \n"; + "#include \n#include \n\n#include \n"; tooling::Replacements Replaces = toReplacements({createInsertion("#include "), createInsertion("#include ")}); @@ -795,14 +855,11 @@ EXPECT_EQ(Expected, apply(Code, Replaces)); } -TEST_F(CleanUpReplacementsTest, AddIncludesWithDifferentForms) { +TEST_F(CleanUpReplacementsTest, NoAddSameIncludesWithDifferentForms) { std::string Code = "#include \"a.h\"\n" "#include \n"; - // FIXME: this might not be the best behavior. std::string Expected = "#include \"a.h\"\n" - "#include \"vector\"\n" - "#include \n" - "#include \n"; + "#include \n"; tooling::Replacements Replaces = toReplacements({createInsertion("#include \"vector\""), createInsertion("#include ")});