diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -18,6 +18,8 @@ unset(CLANGD_BUILD_XPC_DEFAULT) endif () +# This involves generating and compiling large source files, which can run into toolchain limitations. +option(CLANGD_DECISION_FOREST "Enable decision forest model for ranking code completion items" ON) option(CLANGD_MALLOC_TRIM "Call malloc_trim(3) periodically in Clangd. (only takes effect when using glibc)" ON) # -DCLANG_TIDY_CHECKS=Off avoids a dependency on clang-tidy, reducing rebuilds. option(CLANGD_TIDY_CHECKS "Link all clang-tidy checks into clangd" ON) @@ -29,6 +31,7 @@ CLANGD_MALLOC_TRIM CLANGD_TIDY_CHECKS LLVM_ENABLE_ZLIB + CLANGD_DECISION_FOREST ) configure_file( @@ -43,8 +46,12 @@ Option ) -include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake) -gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model CompletionModel clang::clangd::Example) +set(COMPLETIONMODEL_SOURCES) +if(CLANGD_DECISION_FOREST) + include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake) + gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model CompletionModel clang::clangd::Example) + list(APPEND COMPLETIONMODEL_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/CompletionModel.cpp) +endif() if(MSVC AND NOT CLANG_CL) set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of string constant @@ -66,6 +73,7 @@ ConfigCompile.cpp ConfigProvider.cpp ConfigYAML.cpp + DecisionForest.cpp Diagnostics.cpp DraftStore.cpp DumpAST.cpp @@ -102,7 +110,7 @@ TUScheduler.cpp URI.cpp XRefs.cpp - ${CMAKE_CURRENT_BINARY_DIR}/CompletionModel.cpp + ${COMPLETIONMODEL_SOURCES} index/Background.cpp index/BackgroundIndexLoader.cpp diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h --- a/clang-tools-extra/clangd/CodeComplete.h +++ b/clang-tools-extra/clangd/CodeComplete.h @@ -130,7 +130,9 @@ enum CodeCompletionRankingModel { Heuristics, DecisionForest, - } RankingModel = DecisionForest; + }; + static const CodeCompletionRankingModel DefaultRankingModel; + CodeCompletionRankingModel RankingModel = DefaultRankingModel; /// Callback used to score a CompletionCandidate if DecisionForest ranking /// model is enabled. diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -22,6 +22,7 @@ #include "CodeCompletionStrings.h" #include "Compiler.h" #include "ExpectedTypes.h" +#include "Feature.h" #include "FileDistance.h" #include "FuzzyMatch.h" #include "Headers.h" @@ -76,6 +77,16 @@ namespace clang { namespace clangd { + +#if CLANGD_DECISION_FOREST +const CodeCompleteOptions::CodeCompletionRankingModel + CodeCompleteOptions::DefaultRankingModel = + CodeCompleteOptions::DecisionForest; +#else +const CodeCompleteOptions::CodeCompletionRankingModel + CodeCompleteOptions::DefaultRankingModel = CodeCompleteOptions::Heuristics; +#endif + namespace { CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) { diff --git a/clang-tools-extra/clangd/DecisionForest.cpp b/clang-tools-extra/clangd/DecisionForest.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/DecisionForest.cpp @@ -0,0 +1,98 @@ +//===--- DecisionForest.cpp --------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Features.inc" + +#if !CLANGD_DECISION_FOREST +#include "Quality.h" +#include + +namespace clang { +namespace clangd { +DecisionForestScores +evaluateDecisionForest(const SymbolQualitySignals &Quality, + const SymbolRelevanceSignals &Relevance, float Base) { + llvm::errs() << "Clangd was compiled without decision forest support.\n"; + std::abort(); +} + +} // namespace clangd +} // namespace clang + +#else // !CLANGD_DECISION_FOREST + +#include "CompletionModel.h" +#include "Quality.h" +#include + +namespace clang { +namespace clangd { + +DecisionForestScores +evaluateDecisionForest(const SymbolQualitySignals &Quality, + const SymbolRelevanceSignals &Relevance, float Base) { + Example E; + E.setIsDeprecated(Quality.Deprecated); + E.setIsReservedName(Quality.ReservedName); + E.setIsImplementationDetail(Quality.ImplementationDetail); + E.setNumReferences(Quality.References); + E.setSymbolCategory(Quality.Category); + + SymbolRelevanceSignals::DerivedSignals Derived = + Relevance.calculateDerivedSignals(); + int NumMatch = 0; + if (Relevance.ContextWords) { + for (const auto &Word : Relevance.ContextWords->keys()) { + if (Relevance.Name.contains_insensitive(Word)) { + ++NumMatch; + } + } + } + E.setIsNameInContext(NumMatch > 0); + E.setNumNameInContext(NumMatch); + E.setFractionNameInContext( + Relevance.ContextWords && !Relevance.ContextWords->empty() + ? NumMatch * 1.0 / Relevance.ContextWords->size() + : 0); + E.setIsInBaseClass(Relevance.InBaseClass); + E.setFileProximityDistanceCost(Derived.FileProximityDistance); + E.setSemaFileProximityScore(Relevance.SemaFileProximityScore); + E.setSymbolScopeDistanceCost(Derived.ScopeProximityDistance); + E.setSemaSaysInScope(Relevance.SemaSaysInScope); + E.setScope(Relevance.Scope); + E.setContextKind(Relevance.Context); + E.setIsInstanceMember(Relevance.IsInstanceMember); + E.setHadContextType(Relevance.HadContextType); + E.setHadSymbolType(Relevance.HadSymbolType); + E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred); + + DecisionForestScores Scores; + // Exponentiating DecisionForest prediction makes the score of each tree a + // multiplciative boost (like NameMatch). This allows us to weigh the + // prediction score and NameMatch appropriately. + Scores.ExcludingName = pow(Base, Evaluate(E)); + // Following cases are not part of the generated training dataset: + // - Symbols with `NeedsFixIts`. + // - Forbidden symbols. + // - Keywords: Dataset contains only macros and decls. + if (Relevance.NeedsFixIts) + Scores.ExcludingName *= 0.5; + if (Relevance.Forbidden) + Scores.ExcludingName *= 0; + if (Quality.Category == SymbolQualitySignals::Keyword) + Scores.ExcludingName *= 4; + + // NameMatch should be a multiplier on total score to support rescoring. + Scores.Total = Relevance.NameMatch * Scores.ExcludingName; + return Scores; +} + +} // namespace clangd +} // namespace clang + +#endif // !CLANGD_DECISION_FOREST \ No newline at end of file diff --git a/clang-tools-extra/clangd/Feature.cpp b/clang-tools-extra/clangd/Feature.cpp --- a/clang-tools-extra/clangd/Feature.cpp +++ b/clang-tools-extra/clangd/Feature.cpp @@ -66,6 +66,10 @@ #if !CLANGD_TIDY_CHECKS "-tidy" #endif + +#if !CLANGD_DECISION_FOREST + "-decision_forest" +#endif ; } diff --git a/clang-tools-extra/clangd/Features.inc.in b/clang-tools-extra/clangd/Features.inc.in --- a/clang-tools-extra/clangd/Features.inc.in +++ b/clang-tools-extra/clangd/Features.inc.in @@ -4,3 +4,4 @@ #define ENABLE_GRPC_REFLECTION @ENABLE_GRPC_REFLECTION@ #define CLANGD_MALLOC_TRIM @CLANGD_MALLOC_TRIM@ #define CLANGD_TIDY_CHECKS @CLANGD_TIDY_CHECKS@ +#define CLANGD_DECISION_FOREST @CLANGD_DECISION_FOREST@ diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -9,7 +9,6 @@ #include "Quality.h" #include "AST.h" #include "ASTSignals.h" -#include "CompletionModel.h" #include "FileDistance.h" #include "SourceCode.h" #include "index/Symbol.h" @@ -529,65 +528,6 @@ return SymbolQuality * SymbolRelevance; } -DecisionForestScores -evaluateDecisionForest(const SymbolQualitySignals &Quality, - const SymbolRelevanceSignals &Relevance, float Base) { - Example E; - E.setIsDeprecated(Quality.Deprecated); - E.setIsReservedName(Quality.ReservedName); - E.setIsImplementationDetail(Quality.ImplementationDetail); - E.setNumReferences(Quality.References); - E.setSymbolCategory(Quality.Category); - - SymbolRelevanceSignals::DerivedSignals Derived = - Relevance.calculateDerivedSignals(); - int NumMatch = 0; - if (Relevance.ContextWords) { - for (const auto &Word : Relevance.ContextWords->keys()) { - if (Relevance.Name.contains_insensitive(Word)) { - ++NumMatch; - } - } - } - E.setIsNameInContext(NumMatch > 0); - E.setNumNameInContext(NumMatch); - E.setFractionNameInContext( - Relevance.ContextWords && !Relevance.ContextWords->empty() - ? NumMatch * 1.0 / Relevance.ContextWords->size() - : 0); - E.setIsInBaseClass(Relevance.InBaseClass); - E.setFileProximityDistanceCost(Derived.FileProximityDistance); - E.setSemaFileProximityScore(Relevance.SemaFileProximityScore); - E.setSymbolScopeDistanceCost(Derived.ScopeProximityDistance); - E.setSemaSaysInScope(Relevance.SemaSaysInScope); - E.setScope(Relevance.Scope); - E.setContextKind(Relevance.Context); - E.setIsInstanceMember(Relevance.IsInstanceMember); - E.setHadContextType(Relevance.HadContextType); - E.setHadSymbolType(Relevance.HadSymbolType); - E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred); - - DecisionForestScores Scores; - // Exponentiating DecisionForest prediction makes the score of each tree a - // multiplciative boost (like NameMatch). This allows us to weigh the - // prediction score and NameMatch appropriately. - Scores.ExcludingName = pow(Base, Evaluate(E)); - // Following cases are not part of the generated training dataset: - // - Symbols with `NeedsFixIts`. - // - Forbidden symbols. - // - Keywords: Dataset contains only macros and decls. - if (Relevance.NeedsFixIts) - Scores.ExcludingName *= 0.5; - if (Relevance.Forbidden) - Scores.ExcludingName *= 0; - if (Quality.Category == SymbolQualitySignals::Keyword) - Scores.ExcludingName *= 4; - - // NameMatch should be a multiplier on total score to support rescoring. - Scores.Total = Relevance.NameMatch * Scores.ExcludingName; - return Scores; -} - // Produces an integer that sorts in the same order as F. // That is: a < b <==> encodeFloat(a) < encodeFloat(b). static uint32_t encodeFloat(float F) { diff --git a/clang-tools-extra/clangd/benchmarks/CMakeLists.txt b/clang-tools-extra/clangd/benchmarks/CMakeLists.txt --- a/clang-tools-extra/clangd/benchmarks/CMakeLists.txt +++ b/clang-tools-extra/clangd/benchmarks/CMakeLists.txt @@ -1,4 +1,6 @@ -add_subdirectory(CompletionModel) +if(CLANGD_DECISION_FOREST) + add_subdirectory(CompletionModel) +endif() add_benchmark(IndexBenchmark IndexBenchmark.cpp) diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -797,6 +797,13 @@ } } +#if !CLANGD_DECISION_FOREST + if (RankingModel == clangd::CodeCompleteOptions::DecisionForest) { + llvm::errs() << "Clangd was compiled without decision forest support.\n"; + return 1; + } +#endif + // Setup tracing facilities if CLANGD_TRACE is set. In practice enabling a // trace flag in your editor's config is annoying, launching with // `CLANGD_TRACE=trace.json vim` is easier. diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -11,6 +11,7 @@ #include "ClangdServer.h" #include "CodeComplete.h" #include "Compiler.h" +#include "Feature.h" #include "Matchers.h" #include "Protocol.h" #include "Quality.h" @@ -174,6 +175,7 @@ return S; } +#if CLANGD_DECISION_FOREST TEST(DecisionForestRankingModel, NameMatchSanityTest) { clangd::CodeCompleteOptions Opts; Opts.RankingModel = CodeCompleteOptions::DecisionForest; @@ -207,6 +209,7 @@ .Completions, ElementsAre(named("clangA"), named("clangD"))); } +#endif // CLANGD_DECISION_FOREST TEST(DecisionForestRankingModel, DecisionForestScorerCallbackTest) { clangd::CodeCompleteOptions Opts;