diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -27,6 +27,26 @@ FrontendOpenMP ) +set(output_dir ${CMAKE_BINARY_DIR}/generated) +set(output_name CompletionModel) +set(model_h ${output_dir}/${output_name}.h) +set(model_cpp ${output_dir}/${output_name}.cpp) + +set(model_gen ${CMAKE_CURRENT_SOURCE_DIR}/CompletionModelCodegen.py) +set(model_json ${CMAKE_CURRENT_SOURCE_DIR}/model/tree.json) +set(features_json ${CMAKE_CURRENT_SOURCE_DIR}/model/features.json) + +add_custom_command(OUTPUT ${model_cpp} ${model_h} + COMMAND "${Python3_EXECUTABLE}" ${model_gen} --model=${model_json} --features=${features_json} --output_dir=${output_dir} --output_name=${output_name} + COMMENT "Generating code completion model runtime..." + DEPENDS ${model_gen} ${model_json} + VERBATIM ) + +set_source_files_properties("${model_h}" + PROPERTIES GENERATED TRUE) +set_source_files_properties("${model_cpp}" + PROPERTIES GENERATED TRUE) + add_clang_library(clangDaemon AST.cpp ClangdLSPServer.cpp @@ -69,6 +89,7 @@ TUScheduler.cpp URI.cpp XRefs.cpp + ${model_cpp} index/Background.cpp index/BackgroundIndexLoader.cpp @@ -109,6 +130,10 @@ omp_gen ) +target_include_directories(clangDaemon PUBLIC + $ +) + clang_target_link_libraries(clangDaemon PRIVATE clangAST diff --git a/clang-tools-extra/clangd/CompletionModelCodegen.py b/clang-tools-extra/clangd/CompletionModelCodegen.py new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/CompletionModelCodegen.py @@ -0,0 +1,240 @@ +import argparse +import json +import struct +from dataclasses import dataclass + + +@dataclass +class Feature: + name: str + kind: str + + +def gen_header_code(features_json, is_test=False): + gaurd = "LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPLETION_MODEL_{}H".format( + "TEST_" if is_test else "") + code = """#ifndef {gaurd} +#define {gaurd}""".format(gaurd=gaurd) + code += """ +#include +#include +#include +#include + +namespace clang { +namespace clangd { +""" + if is_test: + code += "namespace test {\n" + code += """namespace { +template +To BitCast(From F) { + static_assert(sizeof(To) == sizeof(From), "bad bit_cast"); + To Result; + std::memcpy(&Result, &F, sizeof(From)); + return Result; +} + +// Produces an integer that sorts in the same order as F. +// That is: a < b <==> orderEncode(a) < orderEncode(b). +inline uint32_t OrderEncode(float F) { + static_assert(std::numeric_limits::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = BitCast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} +} // namespace + +""" + + features = [] + assert isinstance(features_json, list) + for feature in features_json: + features.append(Feature(feature['name'], feature['type'])) + + setters = [] + class_members = [] + for feature in features: + if feature.kind == "NUMERICAL": + setters.append( + "void Set{feature}(float V) {{ {feature} = OrderEncode(V); }}". + format(feature=feature.name)) + elif feature.kind == "CATEGORICAL": + setters.append( + "void Set{feature}(unsigned V) {{ {feature} = 1<= {encoded} /*{threshold}*/) goto {true_label};" + .format(label=self.label, + feature=self.feature, + encoded=order_encode(self.threshold), + threshold=self.threshold, + true_label=self.true.label)) + if self.operation == "if_member": + members = '|'.join( + ["BIT({})".format(member) for member in self.members]) + code.append( + "{label}: if(E.{feature} & ({members})) goto {true_label};". + format(label=self.label, + feature=self.feature, + members=members, + true_label=self.true.label)) + return code + self.false.codegen() + self.true.codegen() + + +def gen_evaluate_func(forest_json): + assert isinstance(forest_json, list) + # Generate code for Random Forest. + code = "float Evaluate(const Example& E) {\n" + tree_num = 0 + lines = [] + lines.append("float Score = 0;") + for tree_json in forest_json: + lines += Tree(tree_json, tree_num=tree_num, node_num=0).codegen() + tree_num += 1 + lines.append("tree_{}: // No such tree.".format(tree_num)) + lines.append("return Score;") + + code += " " + "\n ".join(lines) + code += "\n}" + return code + + +def gen_cpp_code(forest_json, header, is_test): + code = "" + # Headers + code = '#include "{header}"\n'.format(header=header) + code += """#include "clang/Sema/CodeCompleteConsumer.h" +#define BIT(X) (1< ) +target_include_directories(ClangdTests PUBLIC + $ +) + clang_target_link_libraries(ClangdTests PRIVATE clangAST diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -10,6 +10,8 @@ #include "ClangdServer.h" #include "CodeComplete.h" #include "Compiler.h" +#include "CompletionModel.h" +#include "CompletionModelTest.h" #include "Matchers.h" #include "Protocol.h" #include "Quality.h" @@ -47,6 +49,7 @@ using ::testing::IsEmpty; using ::testing::Not; using ::testing::UnorderedElementsAre; +using ContextKind = CodeCompletionContext::Kind; // GMock helpers for matching completion items. MATCHER_P(Named, Name, "") { return arg.Name == Name; } @@ -161,6 +164,36 @@ return S; } +TEST(DecisionForestRuntime, Evaluate) { + using Example = clangd::test::Example; + using clangd::test::Evaluate; + + Example E; + E.SetNumReferences(200); // True + E.SetFileProximityDistance(0); // True: +10.0 + E.SetContextKind(ContextKind::CCC_ArrowMemberAccess); // True: +5.0 + EXPECT_EQ(Evaluate(E), 15.0); + + E.SetNumReferences(200); // True + E.SetFileProximityDistance(-2); // False: -20.0 + E.SetContextKind(ContextKind::CCC_Namespace); // True: +5.0 + EXPECT_EQ(Evaluate(E), -15.0); + + E.SetNumReferences(100); // False + E.SetContextKind(ContextKind::CCC_DotMemberAccess); // True: +3.0, False: -6.0 + EXPECT_EQ(Evaluate(E), -3.0); +} + +TEST(DecisionForestRuntime, SanityTest) { + using Example = clangd::Example; + using clangd::Evaluate; + Example E1; + E1.SetContextKind(ContextKind::CCC_ArrowMemberAccess); + Example E2; + E2.SetContextKind(ContextKind::CCC_SymbolOrNewName); + EXPECT_GT(Evaluate(E1), Evaluate(E2)); +} + TEST(CompletionTest, Limit) { clangd::CodeCompleteOptions Opts; Opts.Limit = 2; diff --git a/clang-tools-extra/clangd/unittests/model/features.json b/clang-tools-extra/clangd/unittests/model/features.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/model/features.json @@ -0,0 +1,14 @@ +[ + { + "name": "NumReferences", + "type": "NUMERICAL" + }, + { + "name": "FileProximityDistance", + "type": "NUMERICAL" + }, + { + "name": "ContextKind", + "type": "CATEGORICAL" + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/model/tree.json b/clang-tools-extra/clangd/unittests/model/tree.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/model/tree.json @@ -0,0 +1,52 @@ +[ + { + "operation": "if_greater", + "feature": "NumReferences", + "threshold": 200.0, + "then": { + "operation": "if_greater", + "feature": "FileProximityDistance", + "threshold": -1, + "then": { + "operation": "boost", + "score": 10.0 + }, + "else": { + "operation": "boost", + "score": -20.0 + } + }, + "else": { + "operation": "if_member", + "feature": "ContextKind", + "set": [ + "Kind::CCC_DotMemberAccess", + "Kind::CCC_ArrowMemberAccess" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": -4.0 + } + } + }, + { + "operation": "if_member", + "feature": "ContextKind", + "set": [ + "Kind::CCC_Namespace", + "Kind::CCC_ArrowMemberAccess" + ], + "then": { + "operation": "boost", + "score": 5.0 + }, + "else": { + "operation": "boost", + "score": -6.0 + } + } +] \ No newline at end of file