diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -240,6 +240,10 @@ unsigned edit_distance(StringRef Other, bool AllowReplacements = true, unsigned MaxEditDistance = 0) const; + LLVM_NODISCARD unsigned + edit_distance_insensitive(StringRef Other, bool AllowReplacements = true, + unsigned MaxEditDistance = 0) const; + /// str - Get the contents as an std::string. LLVM_NODISCARD std::string str() const { diff --git a/llvm/include/llvm/ADT/edit_distance.h b/llvm/include/llvm/ADT/edit_distance.h --- a/llvm/include/llvm/ADT/edit_distance.h +++ b/llvm/include/llvm/ADT/edit_distance.h @@ -28,6 +28,9 @@ /// /// \param ToArray the second sequence to compare. /// +/// \param Map A Functor to apply to each item of the sequences before +/// comparison. +/// /// \param AllowReplacements whether to allow element replacements (change one /// element into another) as a single operation, rather than as two operations /// (an insertion and a removal). @@ -39,10 +42,10 @@ /// \returns the minimum number of element insertions, removals, or (if /// \p AllowReplacements is \c true) replacements needed to transform one of /// the given sequences into the other. If zero, the sequences are identical. -template -unsigned ComputeEditDistance(ArrayRef FromArray, ArrayRef ToArray, - bool AllowReplacements = true, - unsigned MaxEditDistance = 0) { +template +unsigned ComputeMappedEditDistance(ArrayRef FromArray, ArrayRef ToArray, + Functor Map, bool AllowReplacements = true, + unsigned MaxEditDistance = 0) { // The algorithm implemented below is the "classic" // dynamic-programming algorithm for computing the Levenshtein // distance, which is described here: @@ -75,15 +78,16 @@ unsigned BestThisRow = Row[0]; unsigned Previous = y - 1; + const auto &CurItem = Map(FromArray[y - 1]); for (typename ArrayRef::size_type x = 1; x <= n; ++x) { int OldRow = Row[x]; if (AllowReplacements) { - Row[x] = std::min( - Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u), - std::min(Row[x-1], Row[x])+1); + Row[x] = std::min(Previous + (CurItem == Map(ToArray[x - 1]) ? 0u : 1u), + std::min(Row[x - 1], Row[x]) + 1); } else { - if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous; + if (CurItem == Map(ToArray[x - 1])) + Row[x] = Previous; else Row[x] = std::min(Row[x-1], Row[x]) + 1; } Previous = OldRow; @@ -98,6 +102,15 @@ return Result; } +template +unsigned ComputeEditDistance(ArrayRef FromArray, ArrayRef ToArray, + bool AllowReplacements = true, + unsigned MaxEditDistance = 0) { + return ComputeMappedEditDistance( + FromArray, ToArray, [](const T &X) -> const T & { return X; }, + AllowReplacements, MaxEditDistance); +} + } // End llvm namespace #endif diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -98,6 +98,13 @@ AllowReplacements, MaxEditDistance); } +unsigned llvm::StringRef::edit_distance_insensitive( + StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const { + return llvm::ComputeMappedEditDistance( + makeArrayRef(data(), size()), makeArrayRef(Other.data(), Other.size()), + llvm::toLower, AllowReplacements, MaxEditDistance); +} + //===----------------------------------------------------------------------===// // String Operations //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp --- a/llvm/unittests/ADT/StringRefTest.cpp +++ b/llvm/unittests/ADT/StringRefTest.cpp @@ -584,6 +584,15 @@ "people soiled our green ")); } +TEST(StringRefTest, EditDistanceInsensitive) { + StringRef Hello("HELLO"); + EXPECT_EQ(2U, Hello.edit_distance_insensitive("hill")); + EXPECT_EQ(0U, Hello.edit_distance_insensitive("hello")); + + StringRef Industry("InDuStRy"); + EXPECT_EQ(6U, Industry.edit_distance_insensitive("iNtErEsT")); +} + TEST(StringRefTest, Misc) { std::string Storage; raw_string_ostream OS(Storage);