diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -26,6 +26,10 @@ AllTargetsInfos FrontendOpenMP ) + +set(DF_COMPILER ${CMAKE_CURRENT_SOURCE_DIR}/CompletionModelCodegen.py) +include(${CMAKE_CURRENT_SOURCE_DIR}/CompletionModel.cmake) +df_compile(${CMAKE_CURRENT_SOURCE_DIR}/model CompletionModel clang::clangd::Example) add_clang_library(clangDaemon AST.cpp @@ -69,6 +73,7 @@ TUScheduler.cpp URI.cpp XRefs.cpp + ${GENERATED_CC} index/Background.cpp index/BackgroundIndexLoader.cpp @@ -109,6 +114,10 @@ omp_gen ) +target_include_directories(clangDaemon PUBLIC + $ +) + clang_target_link_libraries(clangDaemon PRIVATE clangAST diff --git a/clang-tools-extra/clangd/CompletionModel.cmake b/clang-tools-extra/clangd/CompletionModel.cmake new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/CompletionModel.cmake @@ -0,0 +1,31 @@ +# Run the Completion Model Codegenerator on the model in the +# ${model} directory. +# Produces a pair of files called ${fname}.h and ${fname}.cc in the +# ${CMAKE_BINARY_DIR}/generated. The generated header will define a C++ class +# called ${cpp_class} - which may be a namespace-qualified class name. +function(df_compile model fname cpp_class) + set(model_json ${model}/forest.json) + set(model_features ${model}/features.json) + + set(output_dir ${CMAKE_BINARY_DIR}/generated/decision_forest) + set(df_header ${output_dir}/${fname}.h) + set(df_cpp ${output_dir}/${fname}.cpp) + + add_custom_command(OUTPUT ${df_header} ${df_cpp} + COMMAND "${Python3_EXECUTABLE}" ${DF_COMPILER} + --model ${model} + --output_dir ${output_dir} + --fname ${fname} + --cpp_class ${cpp_class} + COMMENT "Generating code completion model runtime..." + DEPENDS ${DF_COMPILER} ${model_json} ${model_features} + VERBATIM ) + + set_source_files_properties(${df_header} PROPERTIES + GENERATED 1) + set_source_files_properties(${df_cpp} PROPERTIES + GENERATED 1 + COMPILE_FLAGS -Wno-unused-label) + set(GENERATED_CC ${df_cpp} PARENT_SCOPE) + set(DF_INCLUDE ${output_dir} PARENT_SCOPE) +endfunction() \ No newline at end of file diff --git a/clang-tools-extra/clangd/CompletionModelCodegen.py b/clang-tools-extra/clangd/CompletionModelCodegen.py new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/CompletionModelCodegen.py @@ -0,0 +1,295 @@ +import argparse +import json +import struct +from enum import Enum +from dataclasses import dataclass +from functools import reduce + + +class Feature: + class Type(Enum): + NUMERICAL = 1 + CATEGORICAL = 2 + + def __init__(self, feature_json): + self.name = feature_json['name'] + assert feature_json['type'] in [t.name for t in Feature.Type + ], "Unknown feature type." + self.type = Feature.Type[feature_json['type']] + + if self.type == Feature.Type.CATEGORICAL: + assert 'header' in feature_json, "Header not found in categorical feature." + assert 'enum' in feature_json, "Enum not found in categorical feature." + self.header = feature_json['header'] + self.enum = feature_json['enum'] + + def setter(self): + if self.type == Feature.Type.NUMERICAL: + return "void Set{feature}(float V) {{ {feature} = OrderEncode(V); }}".format( + feature=self.name) + if self.type == Feature.Type.CATEGORICAL: + return "void Set{feature}(unsigned V) {{ {feature} = 1<= {encoded} /*{threshold}*/) goto {true_label};" + .format(label=self.label, + feature=self.feature, + encoded=order_encode(self.threshold), + threshold=self.threshold, + true_label=self.true.label)) + if self.operation == "if_member": + members = '|'.join([ + "BIT({enum}::{member})".format( + enum=features[self.feature].enum, member=member) + for member in self.members + ]) + code.append( + "{label}: if(E.{feature} & ({members})) goto {true_label};". + format(label=self.label, + feature=self.feature, + members=members, + true_label=self.true.label)) + return code + self.false.codegen(features) + self.true.codegen( + features) + + +def ReadDecisionForest(forest_json: list): + forest = [] + tree_num = 0 + for tree_json in forest_json: + forest.append(Tree(tree_json, tree_num=tree_num, node_num=0)) + tree_num += 1 + return forest + + +def gen_header_code(features, cpp_class): + # Header gaurd + code = """#ifndef {gaurd} +#define {gaurd} + #include + +""".format(gaurd=cpp_class.header_gaurd()) + + # Namespace begin + code += "\n".join(cpp_class.ns_begin()) + "\n" + + # Float order encoding. + code += """ +// Produces an integer that sorts in the same order as F. +// That is: a < b <==> orderEncode(a) < orderEncode(b). +uint32_t OrderEncode(float F); + +""" + setters = [f.setter() for f in features.values()] + class_members = [f.member() for f in features.values()] + + # Class. + code += "class {class_name} {{\n".format(class_name=cpp_class.name) + code += "public:\n" + code += " " + "\n ".join(setters) + "\n" + code += "\n" + code += "private:\n" + code += " " + "\n ".join(class_members) + "\n" + + code += " friend float Evaluate(const Example&);\n" + code += "};\n" + code += "float Evaluate(const Example&);" + "\n" + + # Namespace end. + code += "\n".join(cpp_class.ns_end()) + "\n" + code += "#endif // {gaurd}".format(gaurd=cpp_class.header_gaurd()) + return code + + +def order_encode(v: float): + i = struct.unpack(''.format(h=h) + for h in angled_include) + "\n\n" + code += "\n".join('#include "{h}"'.format(h=h) + for h in quoted_include) + "\n\n" + code += "#define BIT(X) (1<::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +""" + code += gen_evaluate_func(forest, features) + "\n" + # Namespaces End + code += "\n".join(cpp_class.ns_end()) + "\n" + return code + + +def main(): + parser = argparse.ArgumentParser('DecisionForestCodegen') + parser.add_argument('--fname', help='output file name.') + parser.add_argument('--output_dir', help='output directory') + parser.add_argument('--model', help='path to model directory') + parser.add_argument( + '--cpp_class', + help= + 'The name of the class (which may be a namespace-qualified) created in generated header.' + ) + ns = parser.parse_args() + + output_dir = ns.output_dir + fname = ns.fname + header_file = "{dir}/{name}.h".format(dir=output_dir, name=fname) + cpp_file = "{dir}/{name}.cpp".format(dir=output_dir, name=fname) + cpp_class = CppClass(fname=fname, cpp_class=ns.cpp_class) + + model_json = "{dir}/forest.json".format(dir=ns.model) + features_json = "{dir}/features.json".format(dir=ns.model) + + with open(features_json) as features_file: + features = Feature.ReadFeatures(json.load(features_file)) + + with open(model_json) as model_file: + forest = Tree.ReadDecisionForest(json.load(model_file)) + + with open(cpp_file, 'w+t') as output_cc: + output_cc.write( + gen_cpp_code(forest=forest, + features=features, + fname=fname, + cpp_class=cpp_class)) + + with open(header_file, 'w+t') as output_h: + output_h.write(gen_header_code(features=features, cpp_class=cpp_class)) + + +if __name__ == '__main__': + main() diff --git a/clang-tools-extra/clangd/for-review-only/CompletionModel.h b/clang-tools-extra/clangd/for-review-only/CompletionModel.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/CompletionModel.h @@ -0,0 +1,23 @@ +#ifndef GENERATED_CODE_COMPLETION_MODEL_COMPLETION_MODEL_H +#define GENERATED_CODE_COMPLETION_MODEL_COMPLETION_MODEL_H + #include + +namespace clang { +namespace clangd { + +// Produces an integer that sorts in the same order as F. +// That is: a < b <==> orderEncode(a) < orderEncode(b). +uint32_t OrderEncode(float F); + +class Example { +public: + void SetContextKind(unsigned V) { ContextKind = 1< +#include + +#include "CompletionModel.h" +#include "llvm/ADT/bit.h" +#include "clang/Sema/CodeCompleteConsumer.h" + +#define BIT(X) (1<::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +float Evaluate(const Example& E) { + float Score = 0; + tree_0: + t0_n0: if(E.ContextKind & (BIT(clang::CodeCompletionContext::Kind::CCC_DotMemberAccess)|BIT(clang::CodeCompletionContext::Kind::CCC_ArrowMemberAccess))) goto t0_n2; + t0_n1: Score += 1.0; goto tree_1; + t0_n2: Score += 3.0; goto tree_1; + + tree_1: // No such tree. + return Score; +} +} // namespace clang. +} // namespace clangd. diff --git a/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h @@ -0,0 +1,29 @@ +#ifndef GENERATED_CODE_COMPLETION_MODEL_DECISION_FOREST_RUNTIME_TEST_H +#define GENERATED_CODE_COMPLETION_MODEL_DECISION_FOREST_RUNTIME_TEST_H + #include + +namespace ns1 { +namespace ns2 { +namespace test { + +// Produces an integer that sorts in the same order as F. +// That is: a < b <==> orderEncode(a) < orderEncode(b). +uint32_t OrderEncode(float F); + +class Example { +public: + void SetANumber(float V) { ANumber = OrderEncode(V); } + void SetAFloat(float V) { AFloat = OrderEncode(V); } + void SetACategorical(unsigned V) { ACategorical = 1< +#include + +#include "DecisionForestRuntimeTest.h" +#include "llvm/ADT/bit.h" +#include "model/CategoricalFeature.h" + +#define BIT(X) (1<::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +float Evaluate(const Example& E) { + float Score = 0; + tree_0: + t0_n0: if(E.ANumber >= 3276275712 /*200.0*/) goto t0_n4; + t0_n1: if(E.ACategorical & (BIT(ns1::ns2::TestEnum::A)|BIT(ns1::ns2::TestEnum::C))) goto t0_n3; + t0_n2: Score += -4.0; goto tree_1; + t0_n3: Score += 3.0; goto tree_1; + t0_n4: if(E.AFloat >= 1082130432 /*-1*/) goto t0_n6; + t0_n5: Score += -20.0; goto tree_1; + t0_n6: Score += 10.0; goto tree_1; + + tree_1: + t1_n0: if(E.ACategorical & (BIT(ns1::ns2::TestEnum::A)|BIT(ns1::ns2::TestEnum::B))) goto t1_n2; + t1_n1: Score += -6.0; goto tree_2; + t1_n2: Score += 5.0; goto tree_2; + + tree_2: // No such tree. + return Score; +} +} // namespace ns1. +} // namespace ns2. +} // namespace test. diff --git a/clang-tools-extra/clangd/model/features.json b/clang-tools-extra/clangd/model/features.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/model/features.json @@ -0,0 +1,8 @@ +[ + { + "name": "ContextKind", + "type": "CATEGORICAL", + "enum": "clang::CodeCompletionContext::Kind", + "header": "clang/Sema/CodeCompleteConsumer.h" + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/model/forest.json b/clang-tools-extra/clangd/model/forest.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/model/forest.json @@ -0,0 +1,18 @@ +[ + { + "operation": "if_member", + "feature": "ContextKind", + "set": [ + "CCC_DotMemberAccess", + "CCC_ArrowMemberAccess" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": 1.0 + } + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -28,6 +28,10 @@ set(REMOTE_TEST_SOURCES remote/MarshallingTests.cpp) endif() +set(DF_COMPILER ${CMAKE_CURRENT_SOURCE_DIR}/../CompletionModelCodegen.py) +include(${CMAKE_CURRENT_SOURCE_DIR}/../CompletionModel.cmake) +df_compile(${CMAKE_CURRENT_SOURCE_DIR}/model DecisionForestRuntimeTest ns1::ns2::test::Example) + add_custom_target(ClangdUnitTests) add_unittest(ClangdUnitTests ClangdTests Annotations.cpp @@ -44,6 +48,7 @@ ConfigCompileTests.cpp ConfigProviderTests.cpp ConfigYAMLTests.cpp + DecisionForestTests.cpp DexTests.cpp DiagnosticsTests.cpp DraftStoreTests.cpp @@ -87,6 +92,7 @@ TweakTesting.cpp URITests.cpp XRefsTests.cpp + ${GENERATED_CC} support/CancellationTests.cpp support/ContextTests.cpp @@ -101,6 +107,10 @@ $ ) +target_include_directories(ClangdTests PUBLIC + $ +) + clang_target_link_libraries(ClangdTests PRIVATE clangAST diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -10,6 +10,7 @@ #include "ClangdServer.h" #include "CodeComplete.h" #include "Compiler.h" +#include "CompletionModel.h" #include "Matchers.h" #include "Protocol.h" #include "Quality.h" @@ -47,6 +48,7 @@ using ::testing::IsEmpty; using ::testing::Not; using ::testing::UnorderedElementsAre; +using ContextKind = CodeCompletionContext::Kind; // GMock helpers for matching completion items. MATCHER_P(Named, Name, "") { return arg.Name == Name; } @@ -161,6 +163,16 @@ return S; } +TEST(DecisionForestRuntime, SanityTest) { + using Example = clangd::Example; + using clangd::Evaluate; + Example E1; + E1.SetContextKind(ContextKind::CCC_ArrowMemberAccess); + Example E2; + E2.SetContextKind(ContextKind::CCC_SymbolOrNewName); + EXPECT_GT(Evaluate(E1), Evaluate(E2)); +} + TEST(CompletionTest, Limit) { clangd::CodeCompleteOptions Opts; Opts.Limit = 2; diff --git a/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp @@ -0,0 +1,30 @@ + +#include "DecisionForestRuntimeTest.h" +#include "model/CategoricalFeature.h" +#include "gtest/gtest.h" + +namespace clangd { +namespace clangd { + +TEST(DecisionForestRuntime, Evaluate) { + using Example = ::ns1::ns2::test::Example; + using Cat = ::ns1::ns2::TestEnum; + using ::ns1::ns2::test::Evaluate; + + Example E; + E.SetANumber(200); // True + E.SetAFloat(0); // True: +10.0 + E.SetACategorical(Cat::A); // True: +5.0 + EXPECT_EQ(Evaluate(E), 15.0); + + E.SetANumber(200); // True + E.SetAFloat(-2.5); // False: -20.0 + E.SetACategorical(Cat::B); // True: +5.0 + EXPECT_EQ(Evaluate(E), -15.0); + + E.SetANumber(100); // False + E.SetACategorical(Cat::C); // True: +3.0, False: -6.0 + EXPECT_EQ(Evaluate(E), -3.0); +} +} // namespace clangd +} // namespace clangd \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/model/CategoricalFeature.h b/clang-tools-extra/clangd/unittests/model/CategoricalFeature.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/model/CategoricalFeature.h @@ -0,0 +1,5 @@ +namespace ns1 { +namespace ns2 { +enum TestEnum { A, B, C, D }; +} // namespace ns2 +} // namespace ns1 \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/model/features.json b/clang-tools-extra/clangd/unittests/model/features.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/model/features.json @@ -0,0 +1,16 @@ +[ + { + "name": "ANumber", + "type": "NUMERICAL" + }, + { + "name": "AFloat", + "type": "NUMERICAL" + }, + { + "name": "ACategorical", + "type": "CATEGORICAL", + "enum": "ns1::ns2::TestEnum", + "header": "model/CategoricalFeature.h" + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/model/forest.json b/clang-tools-extra/clangd/unittests/model/forest.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/model/forest.json @@ -0,0 +1,52 @@ +[ + { + "operation": "if_greater", + "feature": "ANumber", + "threshold": 200.0, + "then": { + "operation": "if_greater", + "feature": "AFloat", + "threshold": -1, + "then": { + "operation": "boost", + "score": 10.0 + }, + "else": { + "operation": "boost", + "score": -20.0 + } + }, + "else": { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "C" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": -4.0 + } + } + }, + { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "B" + ], + "then": { + "operation": "boost", + "score": 5.0 + }, + "else": { + "operation": "boost", + "score": -6.0 + } + } +] \ No newline at end of file