diff --git a/clang/include/clang/Tooling/Inclusions/HeaderIncludes.h b/clang/include/clang/Tooling/Inclusions/HeaderIncludes.h --- a/clang/include/clang/Tooling/Inclusions/HeaderIncludes.h +++ b/clang/include/clang/Tooling/Inclusions/HeaderIncludes.h @@ -129,6 +129,23 @@ llvm::Regex IncludeRegex; }; +/// \returns a regex that can match various styles of C++ includes. +/// For example: +/// \code +/// #include +/// @import bar; +/// #include "bar.h" +/// \endcode +llvm::Regex getCppIncludeRegex(); + +/// \returns the last match in the list of matches that is not empty. +llvm::StringRef getIncludeNameFromMatches( + const llvm::SmallVectorImpl &Matches); + +/// \returns the given include name and removes the following symbols from the +/// beginning and ending of the include name: " > < ; +llvm::StringRef trimInclude(llvm::StringRef IncludeName); + } // namespace tooling } // namespace clang diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/YAMLTraits.h" #include +#include #include #include #include @@ -2721,13 +2722,6 @@ } } -namespace { - -const char CppIncludeRegexPattern[] = - R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; - -} // anonymous namespace - tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, StringRef FileName, @@ -2737,7 +2731,7 @@ .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM .Default(0); unsigned SearchFrom = 0; - llvm::Regex IncludeRegex(CppIncludeRegexPattern); + llvm::Regex IncludeRegex(tooling::getCppIncludeRegex()); SmallVector Matches; SmallVector IncludesInBlock; @@ -2793,7 +2787,14 @@ bool MergeWithNextLine = Trimmed.endswith("\\"); if (!FormattingOff && !MergeWithNextLine) { if (IncludeRegex.match(Line, &Matches)) { - StringRef IncludeName = Matches[2]; + StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches); + // This addresses https://github.com/llvm/llvm-project/issues/38995 + bool WithSemicolon = false; + if (!IncludeName.startswith("\"") && !IncludeName.startswith("<") && + IncludeName.endswith(";")) { + WithSemicolon = true; + } + if (Line.contains("/*") && !Line.contains("*/")) { // #include with a start of a block comment, but without the end. // Need to keep all the lines until the end of the comment together. @@ -2806,8 +2807,10 @@ int Category = Categories.getIncludePriority( IncludeName, /*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock); - int Priority = Categories.getSortIncludePriority( - IncludeName, !MainIncludeFound && FirstIncludeBlock); + int Priority = WithSemicolon ? std::numeric_limits::max() + : Categories.getSortIncludePriority( + IncludeName, !MainIncludeFound && + FirstIncludeBlock); if (Category == 0) MainIncludeFound = true; IncludesInBlock.push_back( @@ -3067,8 +3070,7 @@ inline bool isHeaderInsertion(const tooling::Replacement &Replace) { return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 && - llvm::Regex(CppIncludeRegexPattern) - .match(Replace.getReplacementText()); + tooling::getCppIncludeRegex().match(Replace.getReplacementText()); } inline bool isHeaderDeletion(const tooling::Replacement &Replace) { @@ -3076,7 +3078,7 @@ } // FIXME: insert empty lines between newly created blocks. -tooling::Replacements +static tooling::Replacements fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, const FormatStyle &Style) { if (!Style.isCpp()) @@ -3108,7 +3110,7 @@ for (const auto &Header : HeadersToDelete) { tooling::Replacements Replaces = - Includes.remove(Header.trim("\"<>"), Header.startswith("<")); + Includes.remove(tooling::trimInclude(Header), Header.startswith("<")); for (const auto &R : Replaces) { auto Err = Result.add(R); if (Err) { @@ -3120,7 +3122,7 @@ } } - llvm::Regex IncludeRegex = llvm::Regex(CppIncludeRegexPattern); + llvm::Regex IncludeRegex = tooling::getCppIncludeRegex(); llvm::SmallVector Matches; for (const auto &R : HeaderInsertions) { auto IncludeDirective = R.getReplacementText(); @@ -3128,9 +3130,9 @@ assert(Matched && "Header insertion replacement must have replacement text " "'#include ...'"); (void)Matched; - auto IncludeName = Matches[2]; - auto Replace = - Includes.insert(IncludeName.trim("\"<>"), IncludeName.startswith("<")); + StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches); + auto Replace = Includes.insert(tooling::trimInclude(IncludeName), + IncludeName.startswith("<")); if (Replace) { auto Err = Result.add(*Replace); if (Err) { diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -169,13 +169,6 @@ }); } -inline StringRef trimInclude(StringRef IncludeName) { - return IncludeName.trim("\"<>"); -} - -const char IncludeRegexPattern[] = - R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; - // The filename of Path excluding extension. // Used to match implementation with headers, this differs from sys::path::stem: // - in names with multiple dots (foo.cu.cc) it terminates at the *first* @@ -274,8 +267,7 @@ MaxInsertOffset(MinInsertOffset + getMaxHeaderInsertionOffset( FileName, Code.drop_front(MinInsertOffset), Style)), - Categories(Style, FileName), - IncludeRegex(llvm::Regex(IncludeRegexPattern)) { + Categories(Style, FileName), IncludeRegex(getCppIncludeRegex()) { // Add 0 for main header and INT_MAX for headers that are not in any // category. Priorities = {0, INT_MAX}; @@ -290,10 +282,11 @@ for (auto Line : Lines) { NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); if (IncludeRegex.match(Line, &Matches)) { + StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches); // If this is the last line without trailing newline, we need to make // sure we don't delete across the file boundary. addExistingInclude( - Include(Matches[2], + Include(IncludeName, tooling::Range( Offset, std::min(Line.size() + 1, Code.size() - Offset))), NextLineOffset); @@ -403,5 +396,25 @@ return Result; } +llvm::Regex getCppIncludeRegex() { + static const char CppIncludeRegexPattern[] = + R"(^[\t\ ]*[@#][\t\ ]*(import|include)([^"]*("[^"]+")|[^<]*(<[^>]+>)|[\t\ ]*([^;]+;)))"; + return llvm::Regex(CppIncludeRegexPattern); +} + +llvm::StringRef getIncludeNameFromMatches( + const llvm::SmallVectorImpl &Matches) { + for (auto Match : llvm::reverse(Matches)) { + if (!Match.empty()) + return Match; + } + llvm_unreachable("No non-empty match group found in list of matches"); + return llvm::StringRef(); +} + +llvm::StringRef trimInclude(llvm::StringRef IncludeName) { + return IncludeName.trim("\"<>;"); +} + } // namespace tooling } // namespace clang diff --git a/clang/unittests/Format/SortIncludesTest.cpp b/clang/unittests/Format/SortIncludesTest.cpp --- a/clang/unittests/Format/SortIncludesTest.cpp +++ b/clang/unittests/Format/SortIncludesTest.cpp @@ -458,6 +458,103 @@ "#include \"b.h\"\n")); } +TEST_F(SortIncludesTest, SupportAtImportLines) { + // Test from https://github.com/llvm/llvm-project/issues/38995 + EXPECT_EQ("#import \"a.h\"\n" + "#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import \n" + "@import Foundation;\n", + sort("#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import \n" + "@import Foundation;\n" + "#import \"a.h\"\n")); + + // Slightly more complicated test that shows sorting in each priorities still + // works. + EXPECT_EQ("#import \"a.h\"\n" + "#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import \n" + "@import Base;\n" + "@import Foundation;\n" + "@import base;\n" + "@import foundation;\n", + sort("#import \"b.h\"\n" + "#import \"c.h\"\n" + "@import Base;\n" + "#import \n" + "@import foundation;\n" + "@import Foundation;\n" + "@import base;\n" + "#import \"a.h\"\n")); + + // Test that shows main headers in two groups are still found and sorting + // still works. The @import's are kept in their respective group but are + // put at the end of each group. + FmtStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve; + EXPECT_EQ("#import \"foo.hpp\"\n" + "#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import \n" + "@import Base;\n" + "@import Foundation;\n" + "@import foundation;\n" + "\n" + "#import \"foo.h\"\n" + "#include \"foobar\"\n" + "#import \n" + "@import AAAA;\n" + "@import aaaa;\n", + sort("#import \"b.h\"\n" + "@import Foundation;\n" + "@import foundation;\n" + "#import \"c.h\"\n" + "#import \n" + "@import Base;\n" + "#import \"foo.hpp\"\n" + "\n" + "@import aaaa;\n" + "#import \n" + "@import AAAA;\n" + "#include \"foobar\"\n" + "#import \"foo.h\"\n", + "foo.c", 2)); + + // Regrouping and putting @import's in the very last group + FmtStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup; + EXPECT_EQ("#import \"foo.hpp\"\n" + "\n" + "#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import \"foo.h\"\n" + "#include \"foobar\"\n" + "\n" + "#import \n" + "#import \n" + "\n" + "@import AAAA;\n" + "@import Base;\n" + "@import Foundation;\n" + "@import aaaa;\n" + "@import foundation;\n", + sort("#import \"b.h\"\n" + "@import Foundation;\n" + "@import foundation;\n" + "#import \"c.h\"\n" + "#import \n" + "@import Base;\n" + "#import \"foo.hpp\"\n" + "\n" + "@import aaaa;\n" + "#import \n" + "@import AAAA;\n" + "#include \"foobar\"\n" + "#import \"foo.h\"\n", + "foo.c")); +} + TEST_F(SortIncludesTest, LeavesMainHeaderFirst) { Style.IncludeIsMainRegex = "([-_](test|unittest))?$"; EXPECT_EQ("#include \"llvm/a.h\"\n"