diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -12,6 +12,7 @@ #include "Path.h" #include "Protocol.h" #include "SourceCode.h" +#include "clang/Basic/LangOptions.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/Support/Error.h" @@ -55,6 +56,32 @@ std::vector Occurrences, llvm::StringRef NewName); +/// Adjusts indexed occurrences to match the current state of the file. +/// +/// The Index is not always up to date. Blindly editing at the locations +/// reported by the index may mangle the code in such cases. +/// This function determines whether the indexed occurrences can be applied to +/// this file, and heuristically repairs the occurrences if necessary. +/// +/// The API assumes that Indexed contains only named occurrences (each +/// occurrence has the same length). +llvm::Expected> +adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier, + std::vector Indexed, const LangOptions &LangOpts); + +/// Calculates the lexed occurrences that the given indexed occurrences map to. +/// Returns an error if we don't find a mapping. +/// +/// Exposed for testing only. +/// +/// REQUIRED: Indexed and Lexed are sorted. +llvm::Expected> getMappedRanges(ArrayRef Indexed, + ArrayRef Lexed); +/// Evaluates how good the mapped result is. 0 indicates a perfect match. +/// REQUIRED: Indexed and Mapped are sorted, and have the same size. +size_t renameRangeAdjustmentCost(ArrayRef Indexed, + ArrayRef Mapped); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" +#include namespace clang { namespace clangd { @@ -355,9 +356,19 @@ elog("Fail to read file content: {0}", AffectedFileCode.takeError()); continue; } - auto RenameEdit = - buildRenameEdit(FilePath, *AffectedFileCode, - std::move(FileAndOccurrences.second), NewName); + auto RenameCandidates = + adjustRenameRanges(*AffectedFileCode, RenameDecl.getNameAsString(), + std::move(FileAndOccurrences.second), + RenameDecl.getASTContext().getLangOpts()); + if (!RenameCandidates) { + return llvm::make_error( + llvm::formatv("Index results don't match the content of file {0} " + "(the index may be stale), details: {0}", + FilePath, llvm::toString(RenameCandidates.takeError())), + llvm::inconvertibleErrorCode()); + } + auto RenameEdit = buildRenameEdit(FilePath, *AffectedFileCode, + *RenameCandidates, NewName); if (!RenameEdit) { return llvm::make_error( llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath, @@ -370,6 +381,44 @@ return Results; } +// A simple edit is eithor changing line or column, but not both. +bool impliesSimpleEdit(const Position &LHS, const Position &RHS) { + return LHS.line == RHS.line || LHS.character == RHS.character; +} + +// Performs a DFS to enumerate all possible near-miss matches. +// It finds the locations where the indexed occurrences are now spelled in +// Lexed occurrences, a near miss is defined as: +// - a near miss maps all of the **name** occurrences from the index onto a +// *subset* of lexed occurrences (we allow a single name refers to more +// than one symbol) +// - all indexed occurrences must be mapped, and Result must be distinct and +// preseve order (only support detecting simple edits to ensure a +// robust mapping) +// - each indexed -> lexed occurrences mapping correspondence may change the +// *line* or *column*, but not both (increases chance of a robust mapping) +void findNearMiss( + std::vector &PartialMatch, ArrayRef IndexedRest, + ArrayRef LexedRest, int LexedIndex, int &Fuel, + llvm::function_ref &)> MatchedCB) { + if (--Fuel < 0) + return; + if (IndexedRest.size() > LexedRest.size()) + return; + if (IndexedRest.empty()) { + MatchedCB(PartialMatch); + return; + } + if (impliesSimpleEdit(IndexedRest.front().start, LexedRest.front().start)) { + PartialMatch.push_back(LexedIndex); + findNearMiss(PartialMatch, IndexedRest.drop_front(), LexedRest.drop_front(), + LexedIndex + 1, Fuel, MatchedCB); + PartialMatch.pop_back(); + } + findNearMiss(PartialMatch, IndexedRest, LexedRest.drop_front(), + LexedIndex + 1, Fuel, MatchedCB); +} + } // namespace llvm::Expected rename(const RenameInputs &RInputs) { @@ -504,5 +553,111 @@ return Edit(InitialCode, std::move(RenameEdit)); } +// Details: +// - lex the draft code to get all rename candidates, this yields a superset +// of candidates. +// - apply range patching heuristics to generate "authoritative" occurrences, +// cases we consider: +// (a) index returns a subset of candidates, we use the indexed results. +// - fully equal, we are sure the index is up-to-date +// - proper subset, index is correct in most cases? there may be false +// positives (e.g. candidates got appended), but rename is still safe +// (b) index returns non-candidate results, we attempt to map the indexed +// ranges onto candidates in a plausible way (e.g. guess that lines +// were inserted). If such a "near miss" is found, the rename is still +// possible +llvm::Expected> +adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier, + std::vector Indexed, const LangOptions &LangOpts) { + assert(!Indexed.empty()); + std::vector Lexed = + collectIdentifierRanges(Identifier, DraftCode, LangOpts); + llvm::sort(Indexed); + llvm::sort(Lexed); + return getMappedRanges(Indexed, Lexed); +} + +llvm::Expected> getMappedRanges(ArrayRef Indexed, + ArrayRef Lexed) { + assert(!Indexed.empty()); + assert(std::is_sorted(Indexed.begin(), Indexed.end())); + assert(std::is_sorted(Lexed.begin(), Lexed.end())); + + if (Indexed.size() > Lexed.size()) + return llvm::make_error( + llvm::inconvertibleErrorCode(), + llvm::formatv("The number of lexed occurrences is less than indexed " + "occurrences")); + // Fast check for the special subset case. + if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end())) + return std::vector{Indexed.begin(), Indexed.end()}; + + std::vector Best; + size_t BestCost = std::numeric_limits::max(); + bool HasMultiple = 0; + std::vector MatchedLexedIndex; + int Fuel = 10000; + findNearMiss(MatchedLexedIndex, Indexed, Lexed, 0, Fuel, + [&](const std::vector &Matched) { + std::vector Mapped; + for (int I : MatchedLexedIndex) + Mapped.push_back(Lexed[I]); + size_t MCost = renameRangeAdjustmentCost(Indexed, Mapped); + if (MCost < BestCost) { + BestCost = MCost; + Best = std::move(Mapped); + HasMultiple = false; // reset + return; + } + if (MCost == BestCost) + HasMultiple = true; + }); + if (HasMultiple) + return llvm::make_error( + llvm::inconvertibleErrorCode(), + llvm::formatv("The best near miss is not distinct")); + if (Best.empty()) + return llvm::make_error( + llvm::inconvertibleErrorCode(), + llvm::formatv("Didn't found a near miss")); + return Best; +} + +/// The cost is the sum of the implied edit sizes between successive diffs, only +/// simple edits are considered: +/// - insert/remove a line (change line offset) +/// - insert/remove a character on an existing line (change the column offset) +/// +/// Example I, total result is 1 + 1 = 2. +/// diff[0]: line + 1 <- insert a line before edit 0. +/// diff[1]: line + 1 +/// diff[2]: line + 1 +/// diff[3]: line + 2 <- insert a line before edits 2 and 3. +/// +/// Example II, total result is 1 + 1 + 1 = 3. +/// diff[0]: line + 1 <- insert a line before edit 0. +/// diff[1]: column + 1 <- remove a line between edits 0 and 1, and insert a +/// character on edit 1. +size_t renameRangeAdjustmentCost(ArrayRef Indexed, + ArrayRef Mapped) { + assert(Indexed.size() == Mapped.size()); + assert(std::is_sorted(Indexed.begin(), Indexed.end())); + assert(std::is_sorted(Mapped.begin(), Mapped.end())); + + int LastLine = -1; + int LastDLine = 0, LastDColumn = 0; + int Cost = 0; + for (size_t I = 0; I < Indexed.size(); ++I) { + int DLine = Indexed[I].start.line - Mapped[I].start.line; + int DColumn = Indexed[I].start.character - Mapped[I].start.character; + int Line = Indexed[I].start.line; + if (Line != LastLine) + LastDColumn = 0; // colmun offsets don't carry cross lines. + Cost += abs(DLine - LastDLine) + abs(DColumn - LastDColumn); + std::tie(LastLine, LastDLine, LastDColumn) = std::tie(Line, DLine, DColumn); + } + return Cost; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -14,6 +14,7 @@ #include "index/Ref.h" #include "refactor/Rename.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -853,6 +854,310 @@ expectedResult(Code, expectedResult(T, "abc"))); } +TEST(CrossFileRenameTests, adjustRenameRanges) { + // Ranges in IndexedCode indicate the indexed occurrences; + // ranges in DraftCode indicate the expected mapped result, empty indicates + // we expect no matched result found. + struct { + llvm::StringRef IndexedCode; + llvm::StringRef DraftCode; + } Tests[] = { + { + // both line and column are changed, not a near miss. + R"cpp( + int [[x]] = 0; + )cpp", + R"cpp( + + int x = 0; + )cpp", + }, + { + // subset. + R"cpp( + int [[x]] = 0; + )cpp", + R"cpp( + int [[x]] = 0; + {int x = 0; } + )cpp", + }, + { + // column shift + R"cpp(int [[x]] = 0; void foo(int x);)cpp", + R"cpp(double [[x]] = 0; void foo(double x);)cpp", + }, + { + // insert a line + R"cpp( + int [[x]] = 0; + void foo(int x); + )cpp", + R"cpp( + // insert a line. + int [[x]] = 0; + void foo(int x); + )cpp", + }, + }; + LangOptions LangOpts; + LangOpts.CPlusPlus = true; + for (const auto &T : Tests) { + auto IndexedOccurrences = Annotations(T.IndexedCode).ranges(); + Annotations Draft(T.DraftCode); + auto ActualRanges = + adjustRenameRanges(Draft.code(), "x", IndexedOccurrences, LangOpts); + if (!Draft.ranges().empty()) { + EXPECT_TRUE((bool)ActualRanges) << "patchRange returned an error: " + << llvm::toString(ActualRanges.takeError()); + EXPECT_THAT(Draft.ranges(), + testing::UnorderedElementsAreArray(*ActualRanges)) + << T.DraftCode; + } else { + EXPECT_FALSE(ActualRanges) + << "expected an error: " << T.DraftCode; + llvm::consumeError(ActualRanges.takeError()); + } + } +} + +TEST(RangePatchingHeuristic, GetMappedRanges) { + // ^ in LexedCode marks the ranges we expect to be mapped; no ^ indicates + // there are no mapped ranges. + struct { + llvm::StringRef IndexedCode; + llvm::StringRef LexedCode; + } Tests[] = { + { + // no lexed ranges. + "[[]]", + "", + }, + { + // both line and column are changed, not a near miss. + R"([[]])", + R"( + [[]] + )", + }, + { + // subset. + "[[]]", + "^[[]] [[]]" + }, + { + // column shift + "[[]] [[]]", + " ^[[]] ^[[]] [[]]" + }, + { + R"( + [[]] + + [[]] [[]] + )", + R"( + // insert a line + ^[[]] + + ^[[]] ^[[]] + )", + }, + { + R"( + [[]] + + [[]] [[]] + )", + R"( + // insert a line + ^[[]] + ^[[]] ^[[]] // column is changed. + )", + }, + { + R"( + [[]] + + [[]] [[]] + )", + R"( + // insert a line + [[]] + + [[]] [[]] // not mapped (both line and column are changed). + )", + }, + { + R"( + [[]] + [[]] + + [[]] + [[]] + + } + )", + R"( + // insert a new line + ^[[]] + ^[[]] + [[]] // additional range + ^[[]] + ^[[]] + [[]] // additional range + )", + }, + { + // non-distinct result (have two best results), not a near miss + R"( + [[]] + [[]] + [[]] + )", + R"( + [[]] + [[]] + [[]] + [[]] + )", + } + }; + for (const auto &T : Tests) { + auto IndexedRanges = Annotations(T.IndexedCode).ranges(); + auto Lexed = Annotations(T.LexedCode); + auto LexedRanges = Lexed.ranges(); + std::vector ExpectedMatches; + for (auto P : Lexed.points()) { + auto Match = llvm::find_if(LexedRanges, [&P](const Range& R) { + return R.start == P; + }); + ASSERT_TRUE(Match != LexedRanges.end()); + ExpectedMatches.push_back(*Match); + } + + auto Mapped = getMappedRanges(IndexedRanges, LexedRanges); + if (!ExpectedMatches.empty()) { + EXPECT_TRUE((bool)Mapped) << "getMappedRanges returned an error: " + << llvm::toString(Mapped.takeError()); + EXPECT_THAT(ExpectedMatches, testing::UnorderedElementsAreArray(*Mapped)) + << T.IndexedCode; + } else { + EXPECT_FALSE(Mapped) << "expected an error: " << T.IndexedCode; + llvm::consumeError(Mapped.takeError()); + } + } +} + +TEST(CrossFileRenameTests, adjustmentCost) { + struct { + llvm::StringRef RangeCode; + size_t ExpectedCost; + } Tests[] = { + { + R"( + $idx[[]]$lex[[]] // diff: 0 + )", + 0, + }, + { + // line offset shift. + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + $lex[[]] // line diff: +1 + + $idx[[]] + + $lex[[]] // line diff: +2 + )", + 1 + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + + $lex[[]] // line diff: +2 + $idx[[]] + + + $lex[[]] // line diff: +3 + )", + 1 + 1 + 1 + }, + { + R"( + $idx[[]] + + + $lex[[]] // line diff: +3 + $idx[[]] + + $lex[[]] // line diff: +2 + $idx[[]] + $lex[[]] // line diff: +1 + )", + 3 + 1 + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $lex[[]] // line diff: -2 + + $idx[[]] + $idx[[]] + + + $lex[[]] // line diff: +3 + )", + 1 + 3 + 5 + }, + // line & column diff. + { + R"( + $idx[[]] $lex[[]] // column diff: +1 + $idx[[]]$lex[[]] // diff: 0 + )", + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // diff: +1 + $idx[[]] $lex[[]] // column diff: +1 + $idx[[]]$lex[[]] // diff: 0 + )", + 1 + 1 + 1 + }, + // column offset shift. + { + R"( + $idx[[]] $lex[[]] // column diff: +1 + )", + 1 + }, + { + R"( + // column diffs: +1, +2, +3 + $idx[[]] $lex[[]] $idx[[]] $lex[[]] $idx[[]] $lex[[]] + )", + 1 + 1 + 1, + }, + }; + for (const auto &T : Tests) { + Annotations C(T.RangeCode); + EXPECT_EQ(renameRangeAdjustmentCost(C.ranges("idx"), C.ranges("lex")), + T.ExpectedCost) + << T.RangeCode; + } +} + } // namespace } // namespace clangd } // namespace clang