diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -12,6 +12,7 @@ #include "Path.h" #include "Protocol.h" #include "SourceCode.h" +#include "clang/Basic/LangOptions.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/Support/Error.h" @@ -55,6 +56,36 @@ std::vector Occurrences, llvm::StringRef NewName); +/// Adjusts indexed occurrences to match the current state of the file. +/// +/// The Index is not always up to date. Blindly editing at the locations +/// reported by the index may mangle the code in such cases. +/// This function determines whether the indexed occurrences can be applied to +/// this file, and heuristically repairs the occurrences if necessary. +/// +/// The API assumes that Indexed contains only named occurrences (each +/// occurrence has the same length). +llvm::Optional> +adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier, + std::vector Indexed, const LangOptions &LangOpts); + +/// Calculates the lexed occurrences that the given indexed occurrences map to. +/// Returns None if we don't find a mapping. +/// +/// Exposed for testing only. +/// +/// REQUIRED: Indexed and Lexed are sorted. +llvm::Optional> getMappedRanges(ArrayRef Indexed, + ArrayRef Lexed); +/// Evaluates how good the mapped result is. 0 indicates a perfect match. +/// +/// Exposed for testing only. +/// +/// REQUIRED: Indexed and Lexed are sorted, Indexed and MappedIndex have the +/// same size. +size_t renameRangeAdjustmentCost(ArrayRef Indexed, ArrayRef Lexed, + ArrayRef MappedIndex); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -18,9 +18,11 @@ #include "clang/AST/DeclTemplate.h" #include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Refactoring/Rename/USRFindingAction.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" +#include namespace clang { namespace clangd { @@ -355,9 +357,22 @@ elog("Fail to read file content: {0}", AffectedFileCode.takeError()); continue; } + auto RenameRanges = + adjustRenameRanges(*AffectedFileCode, RenameDecl.getNameAsString(), + std::move(FileAndOccurrences.second), + RenameDecl.getASTContext().getLangOpts()); + if (!RenameRanges) { + // Our heuristice fails to adjust rename ranges to the current state of + // the file, it is most likely the index is stale, so we give up the + // entire rename. + return llvm::make_error( + llvm::formatv("Index results don't match the content of file {0} " + "(the index may be stale)", + FilePath), + llvm::inconvertibleErrorCode()); + } auto RenameEdit = - buildRenameEdit(FilePath, *AffectedFileCode, - std::move(FileAndOccurrences.second), NewName); + buildRenameEdit(FilePath, *AffectedFileCode, *RenameRanges, NewName); if (!RenameEdit) { return llvm::make_error( llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath, @@ -370,6 +385,44 @@ return Results; } +// A simple edit is eithor changing line or column, but not both. +bool impliesSimpleEdit(const Position &LHS, const Position &RHS) { + return LHS.line == RHS.line || LHS.character == RHS.character; +} + +// Performs a DFS to enumerate all possible near-miss matches. +// It finds the locations where the indexed occurrences are now spelled in +// Lexed occurrences, a near miss is defined as: +// - a near miss maps all of the **name** occurrences from the index onto a +// *subset* of lexed occurrences (we allow a single name refers to more +// than one symbol) +// - all indexed occurrences must be mapped, and Result must be distinct and +// preseve order (only support detecting simple edits to ensure a +// robust mapping) +// - each indexed -> lexed occurrences mapping correspondence may change the +// *line* or *column*, but not both (increases chance of a robust mapping) +void findNearMiss( + std::vector &PartialMatch, ArrayRef IndexedRest, + ArrayRef LexedRest, int LexedIndex, int &Fuel, + llvm::function_ref &)> MatchedCB) { + if (--Fuel < 0) + return; + if (IndexedRest.size() > LexedRest.size()) + return; + if (IndexedRest.empty()) { + MatchedCB(PartialMatch); + return; + } + if (impliesSimpleEdit(IndexedRest.front().start, LexedRest.front().start)) { + PartialMatch.push_back(LexedIndex); + findNearMiss(PartialMatch, IndexedRest.drop_front(), LexedRest.drop_front(), + LexedIndex + 1, Fuel, MatchedCB); + PartialMatch.pop_back(); + } + findNearMiss(PartialMatch, IndexedRest, LexedRest.drop_front(), + LexedIndex + 1, Fuel, MatchedCB); +} + } // namespace llvm::Expected rename(const RenameInputs &RInputs) { @@ -504,5 +557,112 @@ return Edit(InitialCode, std::move(RenameEdit)); } +// Details: +// - lex the draft code to get all rename candidates, this yields a superset +// of candidates. +// - apply range patching heuristics to generate "authoritative" occurrences, +// cases we consider: +// (a) index returns a subset of candidates, we use the indexed results. +// - fully equal, we are sure the index is up-to-date +// - proper subset, index is correct in most cases? there may be false +// positives (e.g. candidates got appended), but rename is still safe +// (b) index returns non-candidate results, we attempt to map the indexed +// ranges onto candidates in a plausible way (e.g. guess that lines +// were inserted). If such a "near miss" is found, the rename is still +// possible +llvm::Optional> +adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier, + std::vector Indexed, const LangOptions &LangOpts) { + assert(!Indexed.empty()); + std::vector Lexed = + collectIdentifierRanges(Identifier, DraftCode, LangOpts); + llvm::sort(Indexed); + llvm::sort(Lexed); + return getMappedRanges(Indexed, Lexed); +} + +llvm::Optional> getMappedRanges(ArrayRef Indexed, + ArrayRef Lexed) { + assert(!Indexed.empty()); + assert(std::is_sorted(Indexed.begin(), Indexed.end())); + assert(std::is_sorted(Lexed.begin(), Lexed.end())); + + if (Indexed.size() > Lexed.size()) { + vlog("The number of lexed occurrences is less than indexed occurrences"); + return llvm::None; + } + // Fast check for the special subset case. + if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end())) + return Indexed.vec(); + + std::vector Best; + size_t BestCost = std::numeric_limits::max(); + bool HasMultiple = 0; + std::vector ResultStorage; + int Fuel = 10000; + findNearMiss(ResultStorage, Indexed, Lexed, 0, Fuel, + [&](const std::vector &Matched) { + size_t MCost = + renameRangeAdjustmentCost(Indexed, Lexed, Matched); + if (MCost < BestCost) { + BestCost = MCost; + Best = std::move(Matched); + HasMultiple = false; // reset + return; + } + if (MCost == BestCost) + HasMultiple = true; + }); + if (HasMultiple) { + vlog("The best near miss is not unique."); + return llvm::None; + } + if (Best.empty()) { + vlog("Didn't find a near miss."); + return llvm::None; + } + std::vector Mapped; + for (auto I : Best) + Mapped.push_back(Lexed[I]); + return Mapped; +} + +// The cost is the sum of the implied edit sizes between successive diffs, only +// simple edits are considered: +// - insert/remove a line (change line offset) +// - insert/remove a character on an existing line (change column offset) +// +// Example I, total result is 1 + 1 = 2. +// diff[0]: line + 1 <- insert a line before edit 0. +// diff[1]: line + 1 +// diff[2]: line + 1 +// diff[3]: line + 2 <- insert a line before edits 2 and 3. +// +// Example II, total result is 1 + 1 + 1 = 3. +// diff[0]: line + 1 <- insert a line before edit 0. +// diff[1]: column + 1 <- remove a line between edits 0 and 1, and insert a +// character on edit 1. +size_t renameRangeAdjustmentCost(ArrayRef Indexed, ArrayRef Lexed, + ArrayRef MappedIndex) { + assert(Indexed.size() == MappedIndex.size()); + assert(std::is_sorted(Indexed.begin(), Indexed.end())); + assert(std::is_sorted(Lexed.begin(), Lexed.end())); + + int LastLine = -1; + int LastDLine = 0, LastDColumn = 0; + int Cost = 0; + for (size_t I = 0; I < Indexed.size(); ++I) { + int DLine = Indexed[I].start.line - Lexed[MappedIndex[I]].start.line; + int DColumn = + Indexed[I].start.character - Lexed[MappedIndex[I]].start.character; + int Line = Indexed[I].start.line; + if (Line != LastLine) + LastDColumn = 0; // colmun offsets don't carry cross lines. + Cost += abs(DLine - LastDLine) + abs(DColumn - LastDColumn); + std::tie(LastLine, LastDLine, LastDColumn) = std::tie(Line, DLine, DColumn); + } + return Cost; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -14,9 +14,11 @@ #include "index/Ref.h" #include "refactor/Rename.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include namespace clang { namespace clangd { @@ -24,7 +26,9 @@ using testing::Eq; using testing::Pair; +using testing::IsEmpty; using testing::UnorderedElementsAre; +using testing::UnorderedElementsAreArray; // Build a RefSlab from all marked ranges in the annotation. The ranges are // assumed to associate with the given SymbolName. @@ -853,6 +857,300 @@ expectedResult(Code, expectedResult(T, "abc"))); } +TEST(CrossFileRenameTests, adjustRenameRanges) { + // Ranges in IndexedCode indicate the indexed occurrences; + // ranges in DraftCode indicate the expected mapped result, empty indicates + // we expect no matched result found. + struct { + llvm::StringRef IndexedCode; + llvm::StringRef DraftCode; + } Tests[] = { + { + // both line and column are changed, not a near miss. + R"cpp( + int [[x]] = 0; + )cpp", + R"cpp( + // insert a line. + double x = 0; + )cpp", + }, + { + // subset. + R"cpp( + int [[x]] = 0; + )cpp", + R"cpp( + int [[x]] = 0; + {int x = 0; } + )cpp", + }, + { + // shift columns. + R"cpp(int [[x]] = 0; void foo(int x);)cpp", + R"cpp(double [[x]] = 0; void foo(double x);)cpp", + }, + { + // shift lines. + R"cpp( + int [[x]] = 0; + void foo(int x); + )cpp", + R"cpp( + // insert a line. + int [[x]] = 0; + void foo(int x); + )cpp", + }, + }; + LangOptions LangOpts; + LangOpts.CPlusPlus = true; + for (const auto &T : Tests) { + Annotations Draft(T.DraftCode); + auto ActualRanges = adjustRenameRanges( + Draft.code(), "x", Annotations(T.IndexedCode).ranges(), LangOpts); + if (!ActualRanges) + EXPECT_THAT(Draft.ranges(), testing::IsEmpty()); + else + EXPECT_THAT(Draft.ranges(), + testing::UnorderedElementsAreArray(*ActualRanges)) + << T.DraftCode; + } +} + +TEST(RangePatchingHeuristic, GetMappedRanges) { + // ^ in LexedCode marks the ranges we expect to be mapped; no ^ indicates + // there are no mapped ranges. + struct { + llvm::StringRef IndexedCode; + llvm::StringRef LexedCode; + } Tests[] = { + { + // no lexed ranges. + "[[]]", + "", + }, + { + // both line and column are changed, not a near miss. + R"([[]])", + R"( + [[]] + )", + }, + { + // subset. + "[[]]", + "^[[]] [[]]" + }, + { + // shift columns. + "[[]] [[]]", + " ^[[]] ^[[]] [[]]" + }, + { + R"( + [[]] + + [[]] [[]] + )", + R"( + // insert a line + ^[[]] + + ^[[]] ^[[]] + )", + }, + { + R"( + [[]] + + [[]] [[]] + )", + R"( + // insert a line + ^[[]] + ^[[]] ^[[]] // column is shifted. + )", + }, + { + R"( + [[]] + + [[]] [[]] + )", + R"( + // insert a line + [[]] + + [[]] [[]] // not mapped (both line and column are changed). + )", + }, + { + R"( + [[]] + [[]] + + [[]] + [[]] + + } + )", + R"( + // insert a new line + ^[[]] + ^[[]] + [[]] // additional range + ^[[]] + ^[[]] + [[]] // additional range + )", + }, + { + // non-distinct result (two best results), not a near miss + R"( + [[]] + [[]] + [[]] + )", + R"( + [[]] + [[]] + [[]] + [[]] + )", + } + }; + for (const auto &T : Tests) { + auto Lexed = Annotations(T.LexedCode); + auto LexedRanges = Lexed.ranges(); + std::vector ExpectedMatches; + for (auto P : Lexed.points()) { + auto Match = llvm::find_if(LexedRanges, [&P](const Range& R) { + return R.start == P; + }); + ASSERT_NE(Match, LexedRanges.end()); + ExpectedMatches.push_back(*Match); + } + + auto Mapped = + getMappedRanges(Annotations(T.IndexedCode).ranges(), LexedRanges); + if (!Mapped) + EXPECT_THAT(ExpectedMatches, IsEmpty()); + else + EXPECT_THAT(ExpectedMatches, UnorderedElementsAreArray(*Mapped)) + << T.IndexedCode; + } +} + +TEST(CrossFileRenameTests, adjustmentCost) { + struct { + llvm::StringRef RangeCode; + size_t ExpectedCost; + } Tests[] = { + { + R"( + $idx[[]]$lex[[]] // diff: 0 + )", + 0, + }, + { + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + $lex[[]] // line diff: +1 + + $idx[[]] + + $lex[[]] // line diff: +2 + )", + 1 + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $idx[[]] + + $lex[[]] // line diff: +2 + $idx[[]] + + + $lex[[]] // line diff: +3 + )", + 1 + 1 + 1 + }, + { + R"( + $idx[[]] + + + $lex[[]] // line diff: +3 + $idx[[]] + + $lex[[]] // line diff: +2 + $idx[[]] + $lex[[]] // line diff: +1 + )", + 3 + 1 + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // line diff: +1 + $lex[[]] // line diff: -2 + + $idx[[]] + $idx[[]] + + + $lex[[]] // line diff: +3 + )", + 1 + 3 + 5 + }, + { + R"( + $idx[[]] $lex[[]] // column diff: +1 + $idx[[]]$lex[[]] // diff: 0 + )", + 1 + }, + { + R"( + $idx[[]] + $lex[[]] // diff: +1 + $idx[[]] $lex[[]] // column diff: +1 + $idx[[]]$lex[[]] // diff: 0 + )", + 1 + 1 + 1 + }, + { + R"( + $idx[[]] $lex[[]] // column diff: +1 + )", + 1 + }, + { + R"( + // column diffs: +1, +2, +3 + $idx[[]] $lex[[]] $idx[[]] $lex[[]] $idx[[]] $lex[[]] + )", + 1 + 1 + 1, + }, + }; + for (const auto &T : Tests) { + Annotations C(T.RangeCode); + std::vector MappedIndex; + for (size_t I = 0; I < C.ranges("lex").size(); ++I) + MappedIndex.push_back(I); + EXPECT_EQ(renameRangeAdjustmentCost(C.ranges("idx"), C.ranges("lex"), + MappedIndex), + T.ExpectedCost) << T.RangeCode; + } +} + } // namespace } // namespace clangd } // namespace clang