diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1020,6 +1020,9 @@ def ThreadSafetyVerbose : DiagGroup<"thread-safety-verbose">; def ThreadSafetyBeta : DiagGroup<"thread-safety-beta">; +// Typoed directive warnings +def TypoedDirective : DiagGroup<"typoed-directive">; + // Uniqueness Analysis warnings def Consumed : DiagGroup<"consumed">; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -360,6 +360,10 @@ "%select{left|right}0 side of operator converted from negative value to " "unsigned: %1">; +def warn_pp_typo_directive : Warning< + "'#%0' directive not found, did you mean '#%1'?">, + InGroup; + def ext_pp_import_directive : Extension<"#import is a language extension">, InGroup>; def err_pp_import_directive_ms : Error< diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2241,6 +2241,11 @@ /// Return true if an error occurs parsing the arg list. bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); + /// Provide a suggestion for a typoed directive. If there is no typo, then + /// just skip suggesting. + void SuggestTypoedDirective(const Token &Tok, StringRef Directive, + const SourceLocation &endLoc); + /// We just read a \#if or related directive and decided that the /// subsequent tokens are in the \#if'd out portion of the /// file. Lex the rest of the file, until we see an \#endif. If \p diff --git a/clang/include/clang/Tooling/LevDistance.h b/clang/include/clang/Tooling/LevDistance.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/LevDistance.h @@ -0,0 +1,98 @@ +//===--- LevDistance.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Levenshtein distances. +// +// The [Levenshtein distance] is a metric for measuring the difference between +// two strings. +// +// [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_LEVDISTANCE_H +#define LLVM_CLANG_TOOLING_LEVDISTANCE_H + +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include +#include +#include +#include + +namespace clang { +namespace tooling { +namespace levdistance { + +// Calculate the difference between `a` and `b`. +size_t levDistance(StringRef a, StringRef b); + +namespace internal { + +// Find the exact case-insensitive string for `word` in `rng` +template +Optional findExactStr(const SinglePassRange &rng, StringRef word) { + auto first = std::cbegin(rng); + auto last = std::cend(rng); + + const auto exactStr = std::find_if(first, last, [word](const auto &r) { + return std::equal(r.begin(), r.end(), word.begin(), word.end(), + [](char a, char b) { return tolower(a) == tolower(b); }); + }); + if (exactStr == last) { + return None; + } + return *exactStr; +} + +} // namespace internal + +// Find a similar string for `word` in `rng`. +template +Optional findSimilarStr(const SinglePassRange &rng, StringRef word, + size_t dist = 0) { + // We need to check if `rng` has the exact case-insensitive string because the + // Levenshtein distance match does not care about it. + if (const auto exactStr = internal::findExactStr(rng, word)) { + return *exactStr; + } + + // Keep going with the Levenshtein distance match. + // If dist is given, use the dist for maxDist; otherwise, if word size is + // less than 3, use the word size minus 1 and if not, use the word size + // divided by 3. + const size_t maxDist = dist != 0 ? dist + : word.size() < 3 ? word.size() - 1 + : word.size() / 3; + + std::vector> candidates; + for (const auto &r : rng) { + const size_t curDist = levDistance(word, r); + if (curDist <= maxDist) { + candidates.emplace_back(r, curDist); + } + } + + if (candidates.empty()) { + return None; + } else if (candidates.size() == 1) { + return candidates[0].first; + } else { + const auto similarStr = std::max_element( + candidates.cbegin(), candidates.cend(), + [](const auto &a, const auto &b) { return a.second < b.second; }); + return (*similarStr).first; + } +} + +} // namespace levdistance +} // namespace tooling +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_LEVDISTANCE_H diff --git a/clang/lib/Lex/CMakeLists.txt b/clang/lib/Lex/CMakeLists.txt --- a/clang/lib/Lex/CMakeLists.txt +++ b/clang/lib/Lex/CMakeLists.txt @@ -29,4 +29,5 @@ LINK_LIBS clangBasic + clangTooling ) diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -32,17 +32,19 @@ #include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/Token.h" #include "clang/Lex/VariadicMacroSupport.h" +#include "clang/Tooling/LevDistance.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" #include +#include #include #include #include @@ -433,6 +435,24 @@ return BytesToSkip - LengthDiff; } +/// SuggestTypoedDirective - Provide a suggestion for a typoed directive. If +/// there is no typo, then just skip suggesting. +void Preprocessor::SuggestTypoedDirective(const Token &Tok, StringRef Directive, + const SourceLocation &endLoc) { + const std::array candidates{ + "if", "ifdef", "ifndef", "elif", "elifdef", "elifndef", "else", "endif"}; + + if (const auto sugg = + tooling::levdistance::findSimilarStr(candidates, Directive)) { + CharSourceRange DirectiveRange = + CharSourceRange::getCharRange(Tok.getLocation(), endLoc); + const std::string suggValue = sugg.getValue(); + + auto Hint = FixItHint::CreateReplacement(DirectiveRange, "#" + suggValue); + Diag(Tok, diag::warn_pp_typo_directive) << Directive << suggValue << Hint; + } +} + /// SkipExcludedConditionalBlock - We just read a \#if or related directive and /// decided that the subsequent tokens are in the \#if'd out portion of the /// file. Lex the rest of the file, until we see an \#endif. If @@ -556,6 +576,8 @@ CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true, /*foundnonskip*/false, /*foundelse*/false); + } else { + SuggestTypoedDirective(Tok, Directive, endLoc); } } else if (Directive[0] == 'e') { StringRef Sub = Directive.substr(1); @@ -705,7 +727,11 @@ break; } } + } else { + SuggestTypoedDirective(Tok, Directive, endLoc); } + } else { + SuggestTypoedDirective(Tok, Directive, endLoc); } CurPPLexer->ParsingPreprocessorDirective = false; diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt --- a/clang/lib/Tooling/CMakeLists.txt +++ b/clang/lib/Tooling/CMakeLists.txt @@ -110,6 +110,7 @@ GuessTargetAndModeCompilationDatabase.cpp InterpolatingCompilationDatabase.cpp JSONCompilationDatabase.cpp + LevDistance.cpp Refactoring.cpp RefactoringCallbacks.cpp StandaloneExecution.cpp diff --git a/clang/lib/Tooling/LevDistance.cpp b/clang/lib/Tooling/LevDistance.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/LevDistance.cpp @@ -0,0 +1,59 @@ +//===--- LevDistance.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains implementations of Levenshtein distances. +// +// The [Levenshtein distance] is a metric for measuring the difference between +// two strings. +// +// [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/LevDistance.h" + +namespace clang { +namespace tooling { +namespace levdistance { + +size_t levDistance(StringRef a, StringRef b) { + const size_t aSize = a.size(); + const size_t bSize = b.size(); + + // for all i and j, d[i,j] will hold the Levenshtein distance between + // the first i characters of `a` and the first j characters of `b` + std::vector> d(aSize + 1, std::vector(bSize + 1)); + d[0][0] = 0; + + // source prefixes can be transformed into empty string by + // dropping all characters + for (size_t i = 1; i <= aSize; ++i) + d[i][0] = i; + + // target prefixes can be reached from empty source prefix + // by inserting every character + for (size_t j = 1; j <= bSize; ++j) + d[0][j] = j; + + for (size_t i = 1; i <= aSize; ++i) { + for (size_t j = 1; j <= bSize; ++j) { + const size_t substCost = a[i - 1] == b[j - 1] ? 0 : 1; + d[i][j] = std::min({ + d[i - 1][j] + 1, // deletion + d[i][j - 1] + 1, // insertion + d[i - 1][j - 1] + substCost // substitution + }); + } + } + + return d[aSize][bSize]; +} + +} // namespace levdistance +} // namespace tooling +} // namespace clang diff --git a/clang/test/OpenMP/predefined_macro.c b/clang/test/OpenMP/predefined_macro.c --- a/clang/test/OpenMP/predefined_macro.c +++ b/clang/test/OpenMP/predefined_macro.c @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -fopenmp-simd -verify -o - %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -verify -o - %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -verify -o - %s -// expected-no-diagnostics +// expected-warning@+5 {{'#elsif' directive not found, did you mean '#elif'?}} #ifdef FOPENMP // -fopenmp option is specified #ifndef _OPENMP diff --git a/clang/test/Preprocessor/suggest-typoed-directive.c b/clang/test/Preprocessor/suggest-typoed-directive.c new file mode 100644 --- /dev/null +++ b/clang/test/Preprocessor/suggest-typoed-directive.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -std=c99 -fsyntax-only -verify -pedantic %s + +// expected-warning@+11 {{'#id' directive not found, did you mean '#if'?}} +// expected-warning@+11 {{'#ifd' directive not found, did you mean '#if'?}} +// expected-warning@+11 {{'#ifde' directive not found, did you mean '#ifdef'?}} +// expected-warning@+11 {{'#elid' directive not found, did you mean '#elif'?}} +// expected-warning@+11 {{'#elsif' directive not found, did you mean '#elif'?}} +// expected-warning@+11 {{'#elseif' directive not found, did you mean '#elif'?}} +// expected-warning@+11 {{'#elfidef' directive not found, did you mean '#elifdef'?}} +// expected-warning@+11 {{'#elfindef' directive not found, did you mean '#elifdef'?}} +// expected-warning@+11 {{'#elsi' directive not found, did you mean '#else'?}} +// expected-warning@+11 {{'#endi' directive not found, did you mean '#endif'?}} +#ifdef UNDEFINED +#id +#ifd +#ifde +#elid +#elsif +#elseif +#elfidef +#elfindef +#elsi +#endi +#endif + +#if special_compiler +#special_compiler_directive // no diagnostics +#endif + +// expected-warning {{ISO C requires a translation unit to contain at least one declaration}} diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt --- a/clang/unittests/Tooling/CMakeLists.txt +++ b/clang/unittests/Tooling/CMakeLists.txt @@ -18,6 +18,7 @@ FixItTest.cpp HeaderIncludesTest.cpp StandardLibraryTest.cpp + LevDistanceTest.cpp LexicallyOrderedRecursiveASTVisitorTest.cpp LookupTest.cpp QualTypeNamesTest.cpp diff --git a/clang/unittests/Tooling/LevDistanceTest.cpp b/clang/unittests/Tooling/LevDistanceTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Tooling/LevDistanceTest.cpp @@ -0,0 +1,60 @@ +//===- unittest/Tooling/LevDistanceTest.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/LevDistance.h" +#include "gtest/gtest.h" +#include + +using namespace clang; + +using tooling::levdistance::findSimilarStr; +using tooling::levdistance::levDistance; + +namespace { + +TEST(LevDistanceTest, levDistance) { + EXPECT_EQ(0ul, levDistance("aaaa", "aaaa")); + EXPECT_EQ(1ul, levDistance("aaaa", "aaab")); + EXPECT_EQ(2ul, levDistance("aabc", "aacb")); + EXPECT_EQ(2ul, levDistance("aabc", "abca")); + EXPECT_EQ(3ul, levDistance("aabc", "adef")); + EXPECT_EQ(4ul, levDistance("abcd", "efgh")); +} + +TEST(LevDistanceTest, findSimilarStr) { + std::array candidates{"aaab", "aaabc"}; + EXPECT_EQ(std::string("aaab"), findSimilarStr(candidates, "aaaa")); + + candidates = {"aab", "abc"}; + EXPECT_EQ(std::string("aab"), findSimilarStr(candidates, "aaa")); + + candidates = {"ab", "bc"}; + EXPECT_EQ(std::string("ab"), findSimilarStr(candidates, "aa")); + + candidates = {"b", "c"}; + EXPECT_EQ(None, findSimilarStr(candidates, "a")); +} + +TEST(LevDistanceTest, findSimilarStrToMacros) { + const std::array candidates{ + "if", "ifdef", "ifndef", "elif", "elifdef", "elifndef", "else", "endif"}; + + EXPECT_EQ(std::string("elifdef"), findSimilarStr(candidates, "elfidef")); + EXPECT_EQ(std::string("elifdef"), findSimilarStr(candidates, "elifdef")); + EXPECT_EQ(None, findSimilarStr(candidates, "special_compiler_directive")); +} + +TEST(LevDistanceTest, findSimilarStrInCaseInsensitive) { + const std::array candidates{ + "if", "ifdef", "ifndef", "elif", "elifdef", "elifndef", "else", "endif"}; + + EXPECT_EQ(std::string("elifdef"), findSimilarStr(candidates, "elifdef")); + EXPECT_EQ(std::string("elifdef"), findSimilarStr(candidates, "ELIFDEF")); +} + +} // end anonymous namespace