diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -55,6 +55,49 @@ std::vector Occurrences, llvm::StringRef NewName); +/// Check whether the index results are good enough to perform the cross-file +/// rename. +/// +/// Index may be stale at the point of renaming, this class is designed to +/// mitigate the issue of staleness index that may hurt the quality of rename +/// results. +/// +/// Techniques: +/// - lex the latest file content (from dirty buffer/disk) to find all rename +/// candidates, this yields a superset candidates +/// - apply range patching heuristics to generate "authoritative" rename +/// candidates. Scenarios that we are confident: +/// (a) index returns a subset of candidates +/// - fully equal, we are sure the index is up-to-date +/// - proper subset, index is correct in most cases? there may be false +/// positives (e.g. candidates got appended), but rename is still safe +/// (b) index returns non-candidate results +/// - check whether they are near-miss (same line/column) +class RangeMatcher { +public: + enum class MatchType : uint8_t { + NoMatch, + + Subset, + NearMiss, + }; + RangeMatcher(const LangOptions &LangOpts) : LangOpts(LangOpts) {} + + /// Infers the \p IndexOccurrens is good enough, and returns the + /// "authoritative" candidates for renaming the \p Identifier in the \p Code. + /// Returns None if the \p IndexOccurrences is not good enough. + llvm::Optional> + getBest(llvm::StringRef Code, llvm::StringRef Identifier, + std::vector IndexOccurrences); + + /// Detects the two inputs are matched. This is an implementation detail of + /// \p getBest, exposing for testing only. + static MatchType match(llvm::ArrayRef LHS, llvm::ArrayRef RHS); + +private: + const LangOptions &LangOpts; +}; + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -16,11 +16,15 @@ #include "index/SymbolCollector.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" +#include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Refactoring/Rename/USRFindingAction.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" +#include +#include namespace clang { namespace clangd { @@ -328,8 +332,6 @@ // as the file content we rename on, and fallback to file content on disk if // there is no dirty buffer. // -// FIXME: add range patching heuristics to detect staleness of the index, and -// report to users. // FIXME: our index may return implicit references, which are non-eligitble // for rename, we should filter out these references. llvm::Expected renameOutsideFile( @@ -341,6 +343,7 @@ if (!AffectedFiles) return AffectedFiles.takeError(); FileEdits Results; + RangeMatcher RMatcher(RenameDecl.getASTContext().getLangOpts()); for (auto &FileAndOccurrences : *AffectedFiles) { llvm::StringRef FilePath = FileAndOccurrences.first(); @@ -349,9 +352,20 @@ elog("Fail to read file content: {0}", AffectedFileCode.takeError()); continue; } + + auto RenameCandidates = + RMatcher.getBest(*AffectedFileCode, RenameDecl.getNameAsString(), + std::move(FileAndOccurrences.second)); + if (!RenameCandidates) { + return llvm::make_error( + llvm::formatv("index results don't match the content of file {0}, " + "the index may be stale.", + FilePath), + llvm::inconvertibleErrorCode()); + } auto RenameEdit = buildRenameEdit(FilePath, *AffectedFileCode, - std::move(FileAndOccurrences.second), NewName); + std::move(*RenameCandidates), NewName); if (!RenameEdit) { return llvm::make_error( llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath, @@ -364,6 +378,102 @@ return Results; } +// Returns true if methods of Subset are all included in Superset. +bool subset(llvm::ArrayRef Subset, llvm::ArrayRef Superset) { + assert(std::is_sorted(Subset.begin(), Subset.end())); + assert(std::is_sorted(Superset.begin(), Superset.end())); + auto ItSubset = Subset.begin(); + auto ItSuperset = Superset.begin(); + while (ItSubset != Subset.end() && ItSuperset != Superset.end()) { + if (*ItSuperset == *ItSubset) { + ++ItSubset; + ++ItSuperset; + } else if (*ItSuperset < *ItSubset) { + ++ItSuperset; + } else { + // we are sure Subset can't be a subset of Superset now, bail out. + break; + } + } + return ItSubset == Subset.end(); +} + +// A forward iterator for reading lines from a sorted array of "Range". +class LineIterator { +public: + explicit LineIterator(ArrayRef All) : All(All) { + CurrentLine = {All.begin(), 0ul}; + advance(); + } + // returns current line number. + int lineNumber() const { return LineNumber; } + bool isEnd() const { return CurrentLine.begin() == All.end(); } + + const ArrayRef *operator->() const { return &CurrentLine; } + LineIterator &operator++() { + advance(); + return *this; + } + +private: + void advance() { + auto NextStart = CurrentLine.end(); + LineNumber = NextStart->start.line; + CurrentLine = + ArrayRef(NextStart, All.end()).take_while([&](const Range &R) { + return R.start.line == LineNumber; + }); + } + int LineNumber = 0; + ArrayRef CurrentLine; + ArrayRef All; +}; + +// Check whether two lines are close enough to each other, they are considered +// near-miss if +// 1) same line number, and difference between columns is small, or +// 2) same columns, difference between lines is small +bool lineNearMiss(LineIterator LHS, LineIterator RHS) { + assert(!LHS.isEnd() && !RHS.isEnd()); + // Bail out early if the number of elements is different. + if (LHS->size() != RHS->size()) + return false; + + static constexpr int KNearMissThrehold = 2; + // Same line, check whether columns are close enough. + if (LHS.lineNumber() == RHS.lineNumber()) { + return std::equal(LHS->begin(), LHS->end(), RHS->begin(), + [&](const Range &R1, const Range &R2) { + return abs(R1.start.character - R2.start.character) <= + KNearMissThrehold; + }); + } + // Diff line, check whether the lines are close enough, and columns are + // the same. + if (abs(LHS.lineNumber() - RHS.lineNumber()) > KNearMissThrehold) + return false; + return std::equal(LHS->begin(), LHS->end(), RHS->begin(), + [](const Range &R1, const Range &R2) { + return R1.start.character == R2.start.character; + }); +} +// Check whether two inputs LHS and RHS are close enough to each other. +// The LHS and RHS are considered near-miss if only if each line of them is +// near-miss. +bool nearMiss(ArrayRef LHS, ArrayRef RHS) { + if (LHS.size() != RHS.size()) + return false; + LineIterator ItLHS(LHS); + LineIterator ItRHS(RHS); + while (!ItLHS.isEnd() && !ItRHS.isEnd()) { + if (!lineNearMiss(ItLHS, ItRHS)) + return false; + ++ItLHS; + ++ItRHS; + } + return ItLHS.isEnd() && ItRHS.isEnd(); +} + } // namespace llvm::Expected rename(const RenameInputs &RInputs) { @@ -499,5 +609,34 @@ return Edit(InitialCode, std::move(RenameEdit)); } +llvm::Optional> +RangeMatcher::getBest(llvm::StringRef Code, llvm::StringRef Identifier, + std::vector IndexOccurrences) { + std::vector Candidates = + collectIdentifierRanges(Identifier, Code, LangOpts); + + llvm::sort(Candidates); + llvm::sort(IndexOccurrences); + + auto M = match(IndexOccurrences, Candidates); + if (M == MatchType::Subset) + return std::move(IndexOccurrences); // RVO is disallowed for parameters. + if (M == MatchType::NearMiss) + return Candidates; + return None; +} + +RangeMatcher::MatchType RangeMatcher::match(llvm::ArrayRef LHS, + llvm::ArrayRef RHS) { + assert(std::is_sorted(LHS.begin(), LHS.end()) && "LHS input must be sorted"); + assert(std::is_sorted(RHS.begin(), RHS.end()) && "RHS input must be sorted"); + + if (subset(LHS, RHS)) + return MatchType::Subset; + if (nearMiss(LHS, RHS)) + return MatchType::NearMiss; + return MatchType::NoMatch; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -11,6 +11,7 @@ #include "TestTU.h" #include "index/Ref.h" #include "refactor/Rename.h" +#include "clang/Basic/LangOptions.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/Support/MemoryBuffer.h" #include "gmock/gmock.h" @@ -695,6 +696,111 @@ expectedResult(Code, expectedResult(T, "abc"))); } +TEST(CrossFileRenameTests, RangePatching) { + struct { + llvm::StringRef LatestCode; + llvm::StringRef IndexCode; + RangeMatcher::MatchType ExpectedMatch; + } Tests [] = { + { + // Equal + R"([[foo]])", + R"([[foo]])", + RangeMatcher::MatchType::Subset, + }, + { + // proper subset + R"([[foo]] [[foo]])", + R"([[foo]])", + RangeMatcher::MatchType::Subset, + }, + { + // same line, near-miss column. + R"([[foo]] [[foo]])", + R"( [[foo]] [[foo]])", + RangeMatcher::MatchType::NearMiss, + }, + { + // same line, near-miss column. + R"([[foo]] [[foo]])", + R"([[foo]] [[foo]])", + RangeMatcher::MatchType::NearMiss, + }, + { + // same column, near-miss line + R"( + [[foo]] [[foo]] + )", + R"( + + [[foo]] [[foo]] + )", + RangeMatcher::MatchType::NearMiss, + }, + + + { + // same line, but column diff > near-miss threshold + R"([[foo]])", + R"( [[foo]])", + RangeMatcher::MatchType::NoMatch, + }, + { + // same column, but line diff > near-miss threshold + R"( + [[foo]] [[foo]] + )", + R"( + + + + [[foo]] [[foo]] + )", + RangeMatcher::MatchType::NoMatch, + }, + { + // two lines with different number of elements are not near-miss. + R"( + [[foo]] [[foo]] + )", + R"( + [[foo]] + [[foo]] + )", + RangeMatcher::MatchType::NoMatch, + } + }; + LangOptions LangOpts; + LangOpts.CPlusPlus = true; + RangeMatcher Matcher(LangOpts); + + for (const auto &T : Tests) { + auto IndexOccurrences = Annotations(T.IndexCode).ranges(); + auto Candidates = Annotations(T.LatestCode).ranges(); + EXPECT_EQ(T.ExpectedMatch, + RangeMatcher::match(IndexOccurrences, Candidates)) + << T.LatestCode << ":" << T.IndexCode; + + + auto AuthoritativeResult = Matcher.getBest(Annotations(T.LatestCode).code(), + "foo", IndexOccurrences); + switch (T.ExpectedMatch) { + case RangeMatcher::MatchType::NoMatch: + EXPECT_EQ(llvm::None, AuthoritativeResult); + break; + case RangeMatcher::MatchType::Subset: + ASSERT_TRUE(AuthoritativeResult); + EXPECT_EQ(IndexOccurrences, *AuthoritativeResult); + break; + case RangeMatcher::MatchType::NearMiss: + ASSERT_TRUE(AuthoritativeResult); + + EXPECT_EQ(Candidates, *AuthoritativeResult); + break; + } + } +} + } // namespace } // namespace clangd } // namespace clang