diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -28,6 +28,9 @@ FrontendOpenMP Option ) + +include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake) +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model CompletionModel clang::clangd::Example) if(MSVC AND NOT CLANG_CL) set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of string constant @@ -77,6 +80,7 @@ TUScheduler.cpp URI.cpp XRefs.cpp + ${CMAKE_CURRENT_BINARY_DIR}/CompletionModel.cpp index/Background.cpp index/BackgroundIndexLoader.cpp @@ -117,6 +121,11 @@ omp_gen ) +# Include generated ComletionModel headers. +target_include_directories(clangDaemon PUBLIC + $ +) + clang_target_link_libraries(clangDaemon PRIVATE clangAST diff --git a/clang-tools-extra/clangd/for-review-only/CompletionModel.h b/clang-tools-extra/clangd/for-review-only/CompletionModel.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/CompletionModel.h @@ -0,0 +1,23 @@ +#ifndef GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H +#define GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H +#include + +namespace clang { +namespace clangd { +class Example { +public: + void setContextKind(unsigned V) { ContextKind = 1 << V; } + +private: + uint32_t ContextKind = 0; + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const Example&); +}; + +float Evaluate(const Example&); +} // namespace clangd +} // namespace clang +#endif // GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H diff --git a/clang-tools-extra/clangd/for-review-only/CompletionModel.cpp b/clang-tools-extra/clangd/for-review-only/CompletionModel.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/CompletionModel.cpp @@ -0,0 +1,39 @@ +#include +#include + +#include "CompletionModel.h" +#include "clang/Sema/CodeCompleteConsumer.h" +#include "llvm/ADT/bit.h" + +#define BIT(X) (1 << X) + +namespace clang { +namespace clangd { + +using ContextKind_type = clang::CodeCompletionContext::Kind; + +uint32_t Example::OrderEncode(float F) { + static_assert(std::numeric_limits::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +float Evaluate(const Example& E) { + float Score = 0; + t0: + t0_n0: if (E.ContextKind & (BIT(ContextKind_type::CCC_DotMemberAccess)|BIT(ContextKind_type::CCC_ArrowMemberAccess))) goto t0_n2; + t0_n1: Score += 1.0; goto t1; + t0_n2: Score += 3.0; goto t1; + + t1: // No such tree. + return Score; +} +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h @@ -0,0 +1,29 @@ +#ifndef GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +#define GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +#include + +namespace ns1 { +namespace ns2 { +namespace test { +class Example { +public: + void setANumber(float V) { ANumber = OrderEncode(V); } + void setAFloat(float V) { AFloat = OrderEncode(V); } + void setACategorical(unsigned V) { ACategorical = 1 << V; } + +private: + uint32_t ANumber = 0; + uint32_t AFloat = 0; + uint32_t ACategorical = 0; + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const Example&); +}; + +float Evaluate(const Example&); +} // namespace test +} // namespace ns2 +} // namespace ns1 +#endif // GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H diff --git a/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp @@ -0,0 +1,50 @@ +#include +#include + +#include "DecisionForestRuntimeTest.h" +#include "decision_forest_model/CategoricalFeature.h" +#include "llvm/ADT/bit.h" + +#define BIT(X) (1 << X) + +namespace ns1 { +namespace ns2 { +namespace test { + +using ACategorical_type = ns1::ns2::TestEnum; + +uint32_t Example::OrderEncode(float F) { + static_assert(std::numeric_limits::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +float Evaluate(const Example& E) { + float Score = 0; + t0: + t0_n0: if (E.ANumber >= 3276275712 /*200.0*/) goto t0_n4; + t0_n1: if (E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::C))) goto t0_n3; + t0_n2: Score += -4.0; goto t1; + t0_n3: Score += 3.0; goto t1; + t0_n4: if (E.AFloat >= 1082130432 /*-1*/) goto t0_n6; + t0_n5: Score += -20.0; goto t1; + t0_n6: Score += 10.0; goto t1; + + t1: + t1_n0: if (E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::B))) goto t1_n2; + t1_n1: Score += -6.0; goto t2; + t1_n2: Score += 5.0; goto t2; + + t2: // No such tree. + return Score; +} +} // namespace test +} // namespace ns2 +} // namespace ns1 diff --git a/clang-tools-extra/clangd/quality/CompletionModel.cmake b/clang-tools-extra/clangd/quality/CompletionModel.cmake new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/CompletionModel.cmake @@ -0,0 +1,36 @@ +# Run the Completion Model Codegenerator on the model present in the +# ${model} directory. +# Produces a pair of files called ${filename}.h and ${filename}.cpp in the +# ${CMAKE_CURRENT_BINARY_DIR}. The generated header +# will define a C++ class called ${cpp_class} - which may be a +# namespace-qualified class name. +function(gen_decision_forest model filename cpp_class) + set(model_compiler ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py) + + set(header_file ${output_dir}/${filename}.h) + set(cpp_file ${output_dir}/${filename}.cpp) + + add_custom_command(OUTPUT ${header_file} ${cpp_file} + COMMAND "${Python3_EXECUTABLE}" ${model_compiler} + --model ${model} + --output_dir ${CMAKE_CURRENT_BINARY_DIR} + --filename ${filename} + --cpp_class ${cpp_class} + COMMENT "Generating code completion model runtime..." + DEPENDS ${model_compiler} ${model}/forest.json ${model}/features.json + VERBATIM ) + + set_source_files_properties(${header_file} PROPERTIES + GENERATED 1) + set_source_files_properties(${cpp_file} PROPERTIES + GENERATED 1) + + # Disable unused label warning for generated files. + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set_source_files_properties(${cpp_file} PROPERTIES + COMPILE_FLAGS /wd4102) + else() + set_source_files_properties(${cpp_file} PROPERTIES + COMPILE_FLAGS -Wno-unused) + endif() +endfunction() diff --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py @@ -0,0 +1,283 @@ +"""Code generator for Code Completion Model Inference. + +Tool runs on the Decision Forest model defined in {model} directory. +It generates two files: {output_dir}/{filename}.h and {output_dir}/{filename}.cpp +The generated files defines the Example class named {cpp_class} having all the features as class members. +The generated runtime provides an `Evaluate` function which can be used to score a code completion candidate. +""" + +import argparse +import json +import struct +from enum import Enum + + +class CppClass: + """Holds class name and names of the enclosing namespaces.""" + + def __init__(self, cpp_class): + ns_and_class = cpp_class.split("::") + self.ns = [ns for ns in ns_and_class[0:-1] if len(ns) > 0] + self.name = ns_and_class[-1] + if len(self.name) == 0: + raise ValueError("Empty class name.") + + def ns_begin(self): + """Returns snippet for opening namespace declarations.""" + open_ns = [f"namespace {ns} {{" for ns in self.ns] + return "\n".join(open_ns) + + def ns_end(self): + """Returns snippet for closing namespace declarations.""" + close_ns = [ + f"}} // namespace {ns}" for ns in reversed(self.ns)] + return "\n".join(close_ns) + + +def header_guard(filename): + '''Returns the header guard for the generated header.''' + return f"GENERATED_DECISION_FOREST_MODEL_{filename.upper()}_H" + + +def boost_node(n, label, next_label): + """Returns code snippet for a leaf/boost node. + Adds value of leaf to the score and jumps to the root of the next tree.""" + return f"{label}: Score += {n['score']}; goto {next_label};" + + +def if_greater_node(n, label, next_label): + """Returns code snippet for a if_greater node. + Jumps to true_label if the Example feature (NUMBER) is greater than the threshold. + Comparing integers is much faster than comparing floats. Assuming floating points + are represented as IEEE 754, it order-encodes the floats to integers before comparing them. + Control falls through if condition is evaluated to false.""" + threshold = n["threshold"] + return f"{label}: if (E.{n['feature']} >= {order_encode(threshold)} /*{threshold}*/) goto {next_label};" + + +def if_member_node(n, label, next_label): + """Returns code snippet for a if_member node. + Jumps to true_label if the Example feature (ENUM) is present in the set of enum values + described in the node. + Control falls through if condition is evaluated to false.""" + members = '|'.join([ + f"BIT({n['feature']}_type::{member})" + for member in n["set"] + ]) + return f"{label}: if (E.{n['feature']} & ({members})) goto {next_label};" + + +def node(n, label, next_label): + """Returns code snippet for the node.""" + return { + 'boost': boost_node, + 'if_greater': if_greater_node, + 'if_member': if_member_node, + }[n['operation']](n, label, next_label) + + +def tree(t, tree_num: int, node_num: int): + """Returns code for inferencing a Decision Tree. + Also returns the size of the decision tree. + + A tree starts with its label `t{tree#}`. + A node of the tree starts with label `t{tree#}_n{node#}`. + + The tree contains two types of node: Conditional node and Leaf node. + - Conditional node evaluates a condition. If true, it jumps to the true node/child. + Code is generated using pre-order traversal of the tree considering + false node as the first child. Therefore the false node is always the + immediately next label. + - Leaf node adds the value to the score and jumps to the next tree. + """ + label = f"t{tree_num}_n{node_num}" + code = [] + if node_num == 0: + code.append(f"t{tree_num}:") + + if t["operation"] == "boost": + code.append(node(t, label=label, next_label=f"t{tree_num+1}")) + return code, 1 + + false_code, false_size = tree( + t['else'], tree_num=tree_num, node_num=node_num+1) + + true_node_num = node_num+false_size+1 + true_label = f"t{tree_num}_n{true_node_num}" + + true_code, true_size = tree( + t['then'], tree_num=tree_num, node_num=true_node_num) + + code.append(node(t, label=label, next_label=true_label)) + + return code+false_code+true_code, 1+false_size+true_size + + +def gen_header_code(features_json: list, cpp_class, filename: str): + """Returns code for header declaring the inference runtime. + + Declares the Example class named {cpp_class} inside relevant namespaces. + The Example class contains all the features as class members. This + class can be used to represent a code completion candidate. + Provides `float Evaluate()` function which can be used to score the Example. + """ + setters = [] + for f in features_json: + feature = f["name"] + if f["kind"] == "NUMBER": + # Floats are order-encoded to integers for faster comparison. + setters.append( + f"void set{feature}(float V) {{ {feature} = OrderEncode(V); }}") + elif f["kind"] == "ENUM": + setters.append( + f"void set{feature}(unsigned V) {{ {feature} = 1 << V; }}") + else: + raise ValueError("Unhandled feature type.", f["kind"]) + + # Class members represent all the features of the Example. + class_members = [f"uint32_t {f['name']} = 0;" for f in features_json] + + nline = "\n " + guard = header_guard(filename) + return f"""#ifndef {guard} +#define {guard} +#include + +{cpp_class.ns_begin()} +class {cpp_class.name} {{ +public: + {nline.join(setters)} + +private: + {nline.join(class_members)} + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const {cpp_class.name}&); +}}; + +float Evaluate(const {cpp_class.name}&); +{cpp_class.ns_end()} +#endif // {guard} +""" + + +def order_encode(v: float): + i = struct.unpack('' + for h in ["cstring", "limits"] + ] + + # Include generated header. + qouted_headers = {f"{filename}.h", "llvm/ADT/bit.h"} + # Headers required by ENUM features used by the model. + qouted_headers |= {f["header"] + for f in features_json if f["kind"] == "ENUM"} + quoted_include = [f'#include "{h}"' for h in sorted(qouted_headers)] + + # using-decl for ENUM features. + using_decls = "\n".join(f"using {feature['name']}_type = {feature['type']};" + for feature in features_json + if feature["kind"] == "ENUM") + nl = "\n" + return f"""{nl.join(angled_include)} + +{nl.join(quoted_include)} + +#define BIT(X) (1 << X) + +{cpp_class.ns_begin()} + +{using_decls} + +uint32_t {cpp_class.name}::OrderEncode(float F) {{ + static_assert(std::numeric_limits::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{{0}} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +}} + +{evaluate_func(forest_json, cpp_class)} +{cpp_class.ns_end()} +""" + + +def main(): + parser = argparse.ArgumentParser('DecisionForestCodegen') + parser.add_argument('--filename', help='output file name.') + parser.add_argument('--output_dir', help='output directory.') + parser.add_argument('--model', help='path to model directory.') + parser.add_argument( + '--cpp_class', + help='The name of the class (which may be a namespace-qualified) created in generated header.' + ) + ns = parser.parse_args() + + output_dir = ns.output_dir + filename = ns.filename + header_file = f"{output_dir}/{filename}.h" + cpp_file = f"{output_dir}/{filename}.cpp" + cpp_class = CppClass(cpp_class=ns.cpp_class) + + model_file = f"{ns.model}/forest.json" + features_file = f"{ns.model}/features.json" + + with open(features_file) as f: + features_json = json.load(f) + + with open(model_file) as m: + forest_json = json.load(m) + + with open(cpp_file, 'w+t') as output_cc: + output_cc.write( + gen_cpp_code(forest_json=forest_json, + features_json=features_json, + filename=filename, + cpp_class=cpp_class)) + + with open(header_file, 'w+t') as output_h: + output_h.write(gen_header_code( + features_json=features_json, cpp_class=cpp_class, filename=filename)) + + +if __name__ == '__main__': + main() diff --git a/clang-tools-extra/clangd/quality/README.md b/clang-tools-extra/clangd/quality/README.md new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/README.md @@ -0,0 +1,220 @@ +# Decision Forest Code Completion Model + +## Decision Forest +A **decision forest** is a collection of many decision trees. A **decision tree** is a full binary tree that provides a quality prediction for an input (code completion item). Internal nodes represent a **binary decision** based on the input data, and leaf nodes represent a prediction. + +In order to predict the relevance of a code completion item, we traverse each of the decision trees beginning with their roots until we reach a leaf. + +An input (code completion candidate) is characterized as a set of **features**, such as the *type of symbol* or the *number of existing references*. + +At every non-leaf node, we evaluate the condition to decide whether to go left or right. The condition compares one *feature** of the input against a constant. The condition can be of two types: +- **if_greater**: Checks whether a numerical feature is **>=** a **threshold**. +- **if_member**: Check whether the **enum** feature is contained in the **set** defined in the node. + +A leaf node contains the value **score**. +To compute an overall **quality** score, we traverse each tree in this way and add up the scores. + +## Model Input Format +The input model is represented in json format. + +### Features +The file **features.json** defines the features available to the model. +It is a json list of features. The features can be of following two kinds. + +#### Number +``` +{ + "name": "a_numerical_feature", + "kind": "NUMBER" +} +``` +#### Enum +``` +{ + "name": "an_enum_feature", + "kind": "ENUM", + "enum": "fully::qualified::enum", + "header": "path/to/HeaderDeclaringEnum.h" +} +``` +The field `enum` specifies the fully qualified name of the enum. +The maximum cardinality of the enum can be **32**. + +The field `header` specifies the header containing the declaration of the enum. +This header is included by the inference runtime. + + +### Decision Forest +The file `forest.json` defines the decision forest. It is a json list of **DecisionTree**. + +**DecisionTree** is one of **IfGreaterNode**, **IfMemberNode**, **LeafNode**. +#### IfGreaterNode +``` +{ + "operation": "if_greater", + "feature": "a_numerical_feature", + "threshold": A real number, + "then": {A DecisionTree}, + "else": {A DecisionTree} +} +``` +#### IfMemberNode +``` +{ + "operation": "if_member", + "feature": "an_enum_feature", + "set": ["enum_value1", "enum_value2", ...], + "then": {A DecisionTree}, + "else": {A DecisionTree} +} +``` +#### LeafNode +``` +{ + "operation": "boost", + "score": A real number +} +``` + +## Code Generator for Inference +The implementation of inference runtime is split across: + +### Code generator +The code generator `CompletionModelCodegen.py` takes input the `${model}` dir and generates the inference library: +- `${output_dir}/{filename}.h` +- `${output_dir}/{filename}.cpp` + +Invocation +``` +python3 CompletionModelCodegen.py \ + --model path/to/model/dir \ + --output_dir path/to/output/dir \ + --filename OutputFileName \ + --cpp_class clang::clangd::YourExampleClass +``` +### Build System +`CompletionModel.cmake` provides `gen_decision_forest` method . +Client intending to use the CompletionModel for inference can use this to trigger the code generator and generate the inference library. +It can then use the generated API by including and depending on this library. + +### Generated API for inference +The code generator defines the Example `class` inside relevant namespaces as specified in option `${cpp_class}`. + +Members of this generated class comprises of all the features mentioned in `features.json`. +Thus this class can represent a code completion candidate that needs to be scored. + +The API also provides `float Evaluate(const MyClass&)` which can be used to score the completion candidate. + + +## Example +### model/features.json +``` +[ + { + "name": "ANumber", + "type": "NUMBER" + }, + { + "name": "AFloat", + "type": "NUMBER" + }, + { + "name": "ACategorical", + "type": "ENUM", + "enum": "ns1::ns2::TestEnum", + "header": "model/CategoricalFeature.h" + } +] +``` +### model/forest.json +``` +[ + { + "operation": "if_greater", + "feature": "ANumber", + "threshold": 200.0, + "then": { + "operation": "if_greater", + "feature": "AFloat", + "threshold": -1, + "then": { + "operation": "boost", + "score": 10.0 + }, + "else": { + "operation": "boost", + "score": -20.0 + } + }, + "else": { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "C" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": -4.0 + } + } + }, + { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "B" + ], + "then": { + "operation": "boost", + "score": 5.0 + }, + "else": { + "operation": "boost", + "score": -6.0 + } + } +] +``` +### DecisionForestRuntime.h +``` +... +namespace ns1 { +namespace ns2 { +namespace test { +class Example { +public: + void setANumber(float V) { ... } + void setAFloat(float V) { ... } + void setACategorical(unsigned V) { ... } + +private: + ... +}; + +float Evaluate(const Example&); +} // namespace test +} // namespace ns2 +} // namespace ns1 +``` + +### CMake Invocation +Inorder to use the inference runtime, one can use `gen_decision_forest` function +described in `CompletionModel.cmake` which invokes `CodeCompletionCodegen.py` with the appropriate arguments. + +For example, the following invocation reads the model present in `path/to/model` and creates +`${CMAKE_CURRENT_BINARY_DIR}/myfilename.h` and `${CMAKE_CURRENT_BINARY_DIR}/myfilename.cpp` +describing a `class` named `MyClass` in namespace `fully::qualified`. + + + +``` +gen_decision_forest(path/to/model + myfilename + ::fully::qualifed::MyClass) +``` \ No newline at end of file diff --git a/clang-tools-extra/clangd/quality/model/features.json b/clang-tools-extra/clangd/quality/model/features.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/model/features.json @@ -0,0 +1,8 @@ +[ + { + "name": "ContextKind", + "kind": "ENUM", + "type": "clang::CodeCompletionContext::Kind", + "header": "clang/Sema/CodeCompleteConsumer.h" + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/quality/model/forest.json b/clang-tools-extra/clangd/quality/model/forest.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/model/forest.json @@ -0,0 +1,18 @@ +[ + { + "operation": "if_member", + "feature": "ContextKind", + "set": [ + "CCC_DotMemberAccess", + "CCC_ArrowMemberAccess" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": 1.0 + } + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -28,6 +28,9 @@ set(REMOTE_TEST_SOURCES remote/MarshallingTests.cpp) endif() +include(${CMAKE_CURRENT_SOURCE_DIR}/../quality/CompletionModel.cmake) +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/decision_forest_model DecisionForestRuntimeTest ::ns1::ns2::test::Example) + add_custom_target(ClangdUnitTests) add_unittest(ClangdUnitTests ClangdTests Annotations.cpp @@ -44,6 +47,7 @@ ConfigCompileTests.cpp ConfigProviderTests.cpp ConfigYAMLTests.cpp + DecisionForestTests.cpp DexTests.cpp DiagnosticsTests.cpp DraftStoreTests.cpp @@ -88,6 +92,7 @@ TweakTesting.cpp URITests.cpp XRefsTests.cpp + ${CMAKE_CURRENT_BINARY_DIR}/DecisionForestRuntimeTest.cpp support/CancellationTests.cpp support/ContextTests.cpp @@ -102,6 +107,11 @@ $ ) +# Include generated ComletionModel headers. +target_include_directories(ClangdTests PUBLIC + $ +) + clang_target_link_libraries(ClangdTests PRIVATE clangAST diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -10,6 +10,7 @@ #include "ClangdServer.h" #include "CodeComplete.h" #include "Compiler.h" +#include "CompletionModel.h" #include "Matchers.h" #include "Protocol.h" #include "Quality.h" @@ -47,6 +48,7 @@ using ::testing::IsEmpty; using ::testing::Not; using ::testing::UnorderedElementsAre; +using ContextKind = CodeCompletionContext::Kind; // GMock helpers for matching completion items. MATCHER_P(Named, Name, "") { return arg.Name == Name; } @@ -161,6 +163,16 @@ return S; } +TEST(DecisionForestRuntime, SanityTest) { + using Example = clangd::Example; + using clangd::Evaluate; + Example E1; + E1.setContextKind(ContextKind::CCC_ArrowMemberAccess); + Example E2; + E2.setContextKind(ContextKind::CCC_SymbolOrNewName); + EXPECT_GT(Evaluate(E1), Evaluate(E2)); +} + TEST(CompletionTest, Limit) { clangd::CodeCompleteOptions Opts; Opts.Limit = 2; diff --git a/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp @@ -0,0 +1,29 @@ +#include "DecisionForestRuntimeTest.h" +#include "decision_forest_model/CategoricalFeature.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { + +TEST(DecisionForestRuntime, Evaluate) { + using Example = ::ns1::ns2::test::Example; + using Cat = ::ns1::ns2::TestEnum; + using ::ns1::ns2::test::Evaluate; + + Example E; + E.setANumber(200); // True + E.setAFloat(0); // True: +10.0 + E.setACategorical(Cat::A); // True: +5.0 + EXPECT_EQ(Evaluate(E), 15.0); + + E.setANumber(200); // True + E.setAFloat(-2.5); // False: -20.0 + E.setACategorical(Cat::B); // True: +5.0 + EXPECT_EQ(Evaluate(E), -15.0); + + E.setANumber(100); // False + E.setACategorical(Cat::C); // True: +3.0, False: -6.0 + EXPECT_EQ(Evaluate(E), -3.0); +} +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h @@ -0,0 +1,5 @@ +namespace ns1 { +namespace ns2 { +enum TestEnum { A, B, C, D }; +} // namespace ns2 +} // namespace ns1 diff --git a/clang-tools-extra/clangd/unittests/decision_forest_model/features.json b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json @@ -0,0 +1,16 @@ +[ + { + "name": "ANumber", + "kind": "NUMBER" + }, + { + "name": "AFloat", + "kind": "NUMBER" + }, + { + "name": "ACategorical", + "kind": "ENUM", + "type": "ns1::ns2::TestEnum", + "header": "decision_forest_model/CategoricalFeature.h" + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json @@ -0,0 +1,52 @@ +[ + { + "operation": "if_greater", + "feature": "ANumber", + "threshold": 200.0, + "then": { + "operation": "if_greater", + "feature": "AFloat", + "threshold": -1, + "then": { + "operation": "boost", + "score": 10.0 + }, + "else": { + "operation": "boost", + "score": -20.0 + } + }, + "else": { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "C" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": -4.0 + } + } + }, + { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "B" + ], + "then": { + "operation": "boost", + "score": 5.0 + }, + "else": { + "operation": "boost", + "score": -6.0 + } + } +] \ No newline at end of file