diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -28,6 +28,9 @@ FrontendOpenMP Option ) + +include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake) +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model CompletionModel clang::clangd::Example) if(MSVC AND NOT CLANG_CL) set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of string constant @@ -77,6 +80,7 @@ TUScheduler.cpp URI.cpp XRefs.cpp + ${DECISION_FOREST_OUTPUT_DIR}/CompletionModel.cpp index/Background.cpp index/BackgroundIndexLoader.cpp @@ -117,6 +121,10 @@ omp_gen ) +target_include_directories(clangDaemon PUBLIC + $ +) + clang_target_link_libraries(clangDaemon PRIVATE clangAST diff --git a/clang-tools-extra/clangd/for-review-only/CompletionModel.h b/clang-tools-extra/clangd/for-review-only/CompletionModel.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/CompletionModel.h @@ -0,0 +1,22 @@ +#ifndef GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H +#define GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H +#include + +namespace clang { +namespace clangd { +class Example { +public: + void setContextKind(unsigned V) { ContextKind = 1 << V; } + +private: + uint32_t ContextKind = 0; + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const Example&); +}; +float Evaluate(const Example&); +} // namespace clangd +} // namespace clang +#endif // GENERATED_DECISION_FOREST_MODEL_COMPLETIONMODEL_H \ No newline at end of file diff --git a/clang-tools-extra/clangd/for-review-only/CompletionModel.cpp b/clang-tools-extra/clangd/for-review-only/CompletionModel.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/CompletionModel.cpp @@ -0,0 +1,39 @@ +#include +#include + +#include "CompletionModel.h" +#include "clang/Sema/CodeCompleteConsumer.h" +#include "llvm/ADT/bit.h" + +#define BIT(X) (1 << X) + +namespace clang { +namespace clangd { + +using ContextKind_type = clang::CodeCompletionContext::Kind; + +uint32_t Example::OrderEncode(float F) { + static_assert(std::numeric_limits::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +float Evaluate(const Example& E) { + float Score = 0; + t0: + t0_n0: if (E.ContextKind & (BIT(ContextKind_type::CCC_DotMemberAccess)|BIT(ContextKind_type::CCC_ArrowMemberAccess))) goto t0_n2; + t0_n1: Score += 1.0; goto t1; + t0_n2: Score += 3.0; goto t1; + + t1: // No such tree. + return Score; +} +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.h @@ -0,0 +1,28 @@ +#ifndef GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +#define GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +#include + +namespace ns1 { +namespace ns2 { +namespace test { +class Example { +public: + void setANumber(float V) { ANumber = OrderEncode(V); } + void setAFloat(float V) { AFloat = OrderEncode(V); } + void setACategorical(unsigned V) { ACategorical = 1 << V; } + +private: + uint32_t ANumber = 0; + uint32_t AFloat = 0; + uint32_t ACategorical = 0; + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const Example&); +}; +float Evaluate(const Example&); +} // namespace test +} // namespace ns2 +} // namespace ns1 +#endif // GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H \ No newline at end of file diff --git a/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/for-review-only/DecisionForestRuntimeTest.cpp @@ -0,0 +1,50 @@ +#include +#include + +#include "llvm/ADT/bit.h" +#include "decision_forest_model/CategoricalFeature.h" +#include "DecisionForestRuntimeTest.h" + +#define BIT(X) (1 << X) + +namespace ns1 { +namespace ns2 { +namespace test { + +using ACategorical_type = ns1::ns2::TestEnum; + +uint32_t Example::OrderEncode(float F) { + static_assert(std::numeric_limits::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +float Evaluate(const Example& E) { + float Score = 0; + t0: + t0_n0: if (E.ANumber >= 3276275712 /*200.0*/) goto t0_n4; + t0_n1: if (E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::C))) goto t0_n3; + t0_n2: Score += -4.0; goto t1; + t0_n3: Score += 3.0; goto t1; + t0_n4: if (E.AFloat >= 1082130432 /*-1*/) goto t0_n6; + t0_n5: Score += -20.0; goto t1; + t0_n6: Score += 10.0; goto t1; + + t1: + t1_n0: if (E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::B))) goto t1_n2; + t1_n1: Score += -6.0; goto t2; + t1_n2: Score += 5.0; goto t2; + + t2: // No such tree. + return Score; +} +} // namespace test +} // namespace ns2 +} // namespace ns1 diff --git a/clang-tools-extra/clangd/quality/CompletionModel.cmake b/clang-tools-extra/clangd/quality/CompletionModel.cmake new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/CompletionModel.cmake @@ -0,0 +1,42 @@ +# Run the Completion Model Codegenerator on the model present in the +# ${model} directory. +# Produces a pair of files called ${filename}.h and ${filename}.cpp in the +# ${CMAKE_BINARY_DIR}/generated/decision_forest. The generated header +# will define a C++ class called ${cpp_class} - which may be a +# namespace-qualified class name. +function(gen_decision_forest model filename cpp_class) + set(model_compiler ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py) + + set(model_json ${model}/forest.json) + set(model_features ${model}/features.json) + + set(output_dir ${CMAKE_BINARY_DIR}/generated/decision_forest) + set(header_file ${output_dir}/${filename}.h) + set(cpp_file ${output_dir}/${filename}.cpp) + + add_custom_command(OUTPUT ${header_file} ${cpp_file} + COMMAND "${Python3_EXECUTABLE}" ${model_compiler} + --model ${model} + --output_dir ${output_dir} + --filename ${filename} + --cpp_class ${cpp_class} + COMMENT "Generating code completion model runtime..." + DEPENDS ${model_compiler} ${model_json} ${model_features} + VERBATIM ) + + set_source_files_properties(${header_file} PROPERTIES + GENERATED 1) + set_source_files_properties(${cpp_file} PROPERTIES + GENERATED 1) + + # Disable unused label warning for generated files. + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set_source_files_properties(${cpp_file} PROPERTIES + COMPILE_FLAGS /wd4102) + else() + set_source_files_properties(${cpp_file} PROPERTIES + COMPILE_FLAGS -Wno-unused) + endif() + + set(DECISION_FOREST_OUTPUT_DIR ${output_dir} PARENT_SCOPE) +endfunction() diff --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py @@ -0,0 +1,291 @@ +"""Code generator for Code Completion Model Inference. + +Tool runs on the Decision Forest model defined in {model} directory. +It generates two files: {output_dir}/{filename}.h and {output_dir}/{filename}.cpp +The generated files defines the Example class named {cpp_class} having all the features as class members. +The generated runtime provides an `Evaluate` function which can be used to score a code completion candidate. +""" + +import argparse +import json +import struct +from enum import Enum + + +class CppClass: + """Holds class name and names of the enclosing namespaces.""" + + def __init__(self, cpp_class): + ns_and_class = cpp_class.split("::") + self.ns = [ns for ns in ns_and_class[0:-1] if len(ns) > 0] + self.name = ns_and_class[-1] + if len(self.name) == 0: + raise ValueError("Empty class name.") + + def ns_begin(self): + """Returns snippet for opening namespace declarations.""" + open_ns = [f"namespace {ns} {{" for ns in self.ns] + return "\n".join(open_ns) + + def ns_end(self): + """Returns snippet for closing namespace declarations.""" + close_ns = [ + f"}} // namespace {ns}" for ns in reversed(self.ns)] + return "\n".join(close_ns) + + +def header_guard(filename): + '''Returns the header guard for the generated header.''' + return f"GENERATED_DECISION_FOREST_MODEL_{filename.upper()}_H" + + +def boost_node(n, label, next_label): + """Returns code snippet for a leaf/boost node. + Adds value of leaf to the score and jumps to the root of the next tree.""" + return f"{label}: Score += {n['score']}; goto {next_label};" + + +def if_greater_node(n, label, next_label): + """Returns code snippet for a if_greater node. + Jumps to true_label if the Example feature (NUMBER) is greater than the threshold. + Comparing integers is much faster than comparing floats. Assuming floating points + are represented as IEEE 754, it order-encodes the floats to integers before comparing them. + Control falls through if condition is evaluated to false.""" + threshold = n["threshold"] + return f"{label}: if (E.{n['feature']} >= {order_encode(threshold)} /*{threshold}*/) goto {next_label};" + + +def if_member_node(n, label, next_label): + """Returns code snippet for a if_member node. + Jumps to true_label if the Example feature (ENUM) is present in the set of enum values + described in the node. + Control falls through if condition is evaluated to false.""" + members = '|'.join([ + f"BIT({n['feature']}_type::{member})" + for member in n["set"] + ]) + return f"{label}: if (E.{n['feature']} & ({members})) goto {next_label};" + + +def node(n, label, next_label): + """Returns code snippet for the node.""" + return { + 'boost': boost_node, + 'if_greater': if_greater_node, + 'if_member': if_member_node, + }[n['operation']](n, label, next_label) + + +def tree(t, tree_num: int, node_num: int): + """Returns code for inferencing a Decision Tree. + Also returns the size of the decision tree. + + A tree starts with its label `t{tree#}`. + A node of the tree starts with label `t{tree#}_n{node#}`. + + The tree contains two types of node: Conditional node and Leaf node. + - Conditional node evaluates a condition. If true, it jumps to the true node/child. + Code is generated using pre-order traversal of the tree considering + false node as the first child. Therefore the false node is always the + immediately next label. + - Leaf node adds the value to the score and jumps to the next tree. + """ + label = f"t{tree_num}_n{node_num}" + code = [] + if node_num == 0: + code.append(f"t{tree_num}:") + + if t["operation"] == "boost": + code.append(node(t, label=label, next_label=f"t{tree_num+1}")) + return code, 1 + + false_code, false_size = tree( + t['else'], tree_num=tree_num, node_num=node_num+1) + + true_node_num = node_num+false_size+1 + true_label = f"t{tree_num}_n{true_node_num}" + + true_code, true_size = tree( + t['then'], tree_num=tree_num, node_num=true_node_num) + + code.append(node(t, label=label, next_label=true_label)) + + return code+false_code+true_code, 1+false_size+true_size + + +def gen_header_code(features_json: list, cpp_class, filename: str): + """Returns code for header declaring the inference runtime. + + Declares the Example class named {cpp_class} inside relevant namespaces. + The Example class contains all the features as class members. This + class can be used to represent a code completion candidate. + Provides `float Evaluate()` function which can be used to score the Example. + """ + # Header guard + code = """#ifndef {guard} +#define {guard} +#include + +""".format(guard=header_guard(filename)) + + # Namespace begin + code += cpp_class.ns_begin() + "\n" + + setters = [] + for f in features_json: + if f["type"] == "NUMBER": + # Floats are order-encoded to integers for faster comparison. + setters.append("void set{feature}(float V) {{ {feature} = OrderEncode(V); }}".format( + feature=f["name"])) + elif f["type"] == "ENUM": + setters.append("void set{feature}(unsigned V) {{ {feature} = 1 << V; }}".format( + feature=f["name"])) + else: + raise ValueError("Unhandled feature type.", f["type"]) + + # Class members represent all the features of the Example. + class_members = [f"uint32_t {f['name']} = 0;" for f in features_json] + + # Class definition. + code += f"class {cpp_class.name} {{\n" + code += "public:\n" + code += " " + "\n ".join(setters) + "\n" + code += "\n" + code += "private:\n" + code += " " + "\n ".join(class_members) + "\n" + + code += """ + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); +""" + code += " friend float Evaluate(const {}&);\n".format(cpp_class.name) + code += "};\n" + code += "float Evaluate(const {}&);".format(cpp_class.name) + "\n" + + # Namespace end and Header guard. + code += cpp_class.ns_end() + "\n" + code += f"#endif // {header_guard(filename)}" + return code + + +def order_encode(v: float): + i = struct.unpack('' for h in angled_include) + "\n\n" + code += "\n".join(f'#include "{h}"' for h in quoted_include) + "\n\n" + code += "#define BIT(X) (1 << X)\n\n" + + # Namespaces Begin. + code += cpp_class.ns_begin() + "\n" + + # using-decl for ENUM features. + code += "\n" + "\n".join(f"using {feature['name']}_type = {feature['enum']};" + for feature in features_json + if feature["type"] == "ENUM") + "\n" + + # Float order encoding. + code += f""" +uint32_t {cpp_class.name}::OrderEncode(float F) {{ + static_assert(std::numeric_limits::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{{0}} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::bit_cast(F); + std::memcpy(&U, &F, sizeof(U)); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +}} + +""" + + code += evaluate_func(forest_json, cpp_class) + "\n" + # Namespaces End. + code += cpp_class.ns_end() + "\n" + return code + + +def main(): + parser = argparse.ArgumentParser('DecisionForestCodegen') + parser.add_argument('--filename', help='output file name.') + parser.add_argument('--output_dir', help='output directory.') + parser.add_argument('--model', help='path to model directory.') + parser.add_argument( + '--cpp_class', + help='The name of the class (which may be a namespace-qualified) created in generated header.' + ) + ns = parser.parse_args() + + output_dir = ns.output_dir + filename = ns.filename + header_file = f"{output_dir}/{filename}.h" + cpp_file = f"{output_dir}/{filename}.cpp" + cpp_class = CppClass(cpp_class=ns.cpp_class) + + model_file = f"{ns.model}/forest.json" + features_file = f"{ns.model}/features.json" + + with open(features_file) as f: + features_json = json.load(f) + + with open(model_file) as m: + forest_json = json.load(m) + + with open(cpp_file, 'w+t') as output_cc: + output_cc.write( + gen_cpp_code(forest_json=forest_json, + features_json=features_json, + filename=filename, + cpp_class=cpp_class)) + + with open(header_file, 'w+t') as output_h: + output_h.write(gen_header_code( + features_json=features_json, cpp_class=cpp_class, filename=filename)) + + +if __name__ == '__main__': + main() diff --git a/clang-tools-extra/clangd/quality/README.md b/clang-tools-extra/clangd/quality/README.md new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/README.md @@ -0,0 +1,245 @@ +# Decision Forest Code Completion Model + +## Decision Forest +A **decision forest** is a collection of many decision trees. A **decision tree** is a full binary tree where every non-leaf node has exactly **2** children. + +In order to predict the relevance of a code completion item, we traverse each of the decision trees beginning with their roots until we reach a leaf. + +At every non-leaf node, we evaluate the condition present in the node. The condition refers to exactly one **feature**. It uses the value of this attribute from the code completion item to evaluate the condition. +Based on the condition, we move to its true child or the false child. + +The condition can be of two types: +- **if_greater**: Check whether a numerical feature is **>=** the **threshold**. +- **if_member**: Check whether the **enum** feature is contained in the **set** defined in the node. + +A leaf node only contains the value **score**. +Once we know the set of leaves (one from each decision tree), we add the **score** values from each of the leaves to get the final relevance score. + +## Model Input Format +The input model is represented in json format. +### Features +The file **features.json** defines the features available to the model. It is a json list of features. The features can be of following two types. +#### Number +``` +{ + "name": "a_numerical_feature", + "type": "NUMBER" +} +``` +#### Enum +``` +{ + "name": "an_enum_feature", + "type": "ENUM", + "enum": "fully::qualified::enum", + "header": "path/to/HeaderDeclaringEnum.h" +} +``` +The field `enum` specifies the fully qualified name of the enum. + +The field `header` specifies the header containing the declaration of the enum. This header is included by the inference runtime. + + +### Decision Forest +The file `forest.json` defines the decision forest. It is a json list of **DecisionTree**. + +**DecisionTree** is one of **IfGreaterNode**, **IfMemberNode**, **LeafNode**. +#### IfGreaterNode +``` +{ + "operation": "if_greater", + "feature": "a_numerical_feature", + "threshold": A real number, + "then": {A DecisionTree}, + "else": {A DecisionTree} +} +``` +#### IfMemberNode +``` +{ + "operation": "if_member", + "feature": "an_enum_feature", + "set": ["enum_value1", "enum_value2", ...], + "then": {A DecisionTree}, + "else": {A DecisionTree} +} +``` +#### LeafNode +``` +{ + "operation": "boost", + "score": A real number +} +``` + +## Code Generator for Inference +The implementation of inference runtime is split across: +- Build System (CMake) +- Generated code/API for inference +- Code generator +- Input model + +### Code generator +The code generator `CompletionModelCodegen.py` takes input the `${model}` dir and generates the inference library: +- `${output_dir}/{filename}.h` +- `${output_dir}/{filename}.cpp` + +### Build System +`CompletionModel.cmake` provides `gen_decision_forest` method . Client intending to use the CompletionModel for inference can use this to trigger the code generator and generate the inference library. It can then use the generated API by including and depending on this library. + +### Generated API for inference +The code generator defines the Example `class` inside relevant namespaces as specified in option `${cpp_class}`. + +Members of this generated class comprises of all the features mentioned in `features.json`. Thus this class can represent a code completion candidate that needs to be scored. + +The API also provides `float Evaluate(const MyClass&)` which can be used to score the completion candidate. + + +## Example +### model/features.json +``` +[ + { + "name": "ANumber", + "type": "NUMBER" + }, + { + "name": "AFloat", + "type": "NUMBER" + }, + { + "name": "ACategorical", + "type": "ENUM", + "enum": "ns1::ns2::TestEnum", + "header": "model/CategoricalFeature.h" + } +] +``` +### model/forest.json +``` +[ + { + "operation": "if_greater", + "feature": "ANumber", + "threshold": 200.0, + "then": { + "operation": "if_greater", + "feature": "AFloat", + "threshold": -1, + "then": { + "operation": "boost", + "score": 10.0 + }, + "else": { + "operation": "boost", + "score": -20.0 + } + }, + "else": { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "C" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": -4.0 + } + } + }, + { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "B" + ], + "then": { + "operation": "boost", + "score": 5.0 + }, + "else": { + "operation": "boost", + "score": -6.0 + } + } +] +``` +### DecisionForestRuntime.h +``` +# ifndef GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +# define GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +# include + +namespace ns1 { +namespace ns2 { +namespace test { +class Example { +public: + void setANumber(float V) { ANumber = OrderEncode(V); } + void setAFloat(float V) { AFloat = OrderEncode(V); } + void setACategorical(unsigned V) { ACategorical = 1 << V; } + +private: + uint32_t ANumber = 0; + uint32_t AFloat = 0; + uint32_t ACategorical = 0; + + // Produces an integer that sorts in the same order as F. + // That is: a < b <==> orderEncode(a) < orderEncode(b). + static uint32_t OrderEncode(float F); + friend float Evaluate(const Example&); +}; +float Evaluate(const Example&); +} // namespace test +} // namespace ns2 +} // namespace ns1 +#endif // GENERATED_DECISION_FOREST_MODEL_DECISIONFORESTRUNTIMETEST_H +``` +### DecisionForestRuntime.cpp +``` +#include +#include + +#include "llvm/ADT/bit.h" +#include "model/CategoricalFeature.h" +#include "DecisionForestRuntimeTest.h" + +#define BIT(X) (1 << X) + +namespace ns1 { +namespace ns2 { +namespace test { + +using ACategorical_type = ns1::ns2::TestEnum; + +uint32_t Example::OrderEncode(float F) {...} + +float Evaluate(const Example& E) { + float Score = 0; + t0: + t0_n0: if(E.ANumber >= 3276275712 /*200.0*/) goto t0_n4; + t0_n1: if(E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::C))) goto t0_n3; + t0_n2: Score += -4.0; goto t1; + t0_n3: Score += 3.0; goto t1; + t0_n4: if(E.AFloat >= 1082130432 /*-1*/) goto t0_n6; + t0_n5: Score += -20.0; goto t1; + t0_n6: Score += 10.0; goto t1; + + t1: + t1_n0: if(E.ACategorical & (BIT(ACategorical_type::A)|BIT(ACategorical_type::B))) goto t1_n2; + t1_n1: Score += -6.0; goto t2; + t1_n2: Score += 5.0; goto t2; + + t2: // No such tree. + return Score; +} +} // namespace test +} // namespace ns2 +} // namespace ns1 +``` \ No newline at end of file diff --git a/clang-tools-extra/clangd/quality/model/features.json b/clang-tools-extra/clangd/quality/model/features.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/model/features.json @@ -0,0 +1,8 @@ +[ + { + "name": "ContextKind", + "type": "ENUM", + "enum": "clang::CodeCompletionContext::Kind", + "header": "clang/Sema/CodeCompleteConsumer.h" + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/quality/model/forest.json b/clang-tools-extra/clangd/quality/model/forest.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/quality/model/forest.json @@ -0,0 +1,18 @@ +[ + { + "operation": "if_member", + "feature": "ContextKind", + "set": [ + "CCC_DotMemberAccess", + "CCC_ArrowMemberAccess" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": 1.0 + } + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -28,6 +28,9 @@ set(REMOTE_TEST_SOURCES remote/MarshallingTests.cpp) endif() +include(${CMAKE_CURRENT_SOURCE_DIR}/../quality/CompletionModel.cmake) +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/decision_forest_model DecisionForestRuntimeTest ::ns1::ns2::test::Example) + add_custom_target(ClangdUnitTests) add_unittest(ClangdUnitTests ClangdTests Annotations.cpp @@ -44,6 +47,7 @@ ConfigCompileTests.cpp ConfigProviderTests.cpp ConfigYAMLTests.cpp + DecisionForestTests.cpp DexTests.cpp DiagnosticsTests.cpp DraftStoreTests.cpp @@ -88,6 +92,7 @@ TweakTesting.cpp URITests.cpp XRefsTests.cpp + ${DECISION_FOREST_OUTPUT_DIR}/DecisionForestRuntimeTest.cpp support/CancellationTests.cpp support/ContextTests.cpp @@ -102,6 +107,10 @@ $ ) +target_include_directories(ClangdTests PUBLIC + $ +) + clang_target_link_libraries(ClangdTests PRIVATE clangAST diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -10,6 +10,7 @@ #include "ClangdServer.h" #include "CodeComplete.h" #include "Compiler.h" +#include "CompletionModel.h" #include "Matchers.h" #include "Protocol.h" #include "Quality.h" @@ -47,6 +48,7 @@ using ::testing::IsEmpty; using ::testing::Not; using ::testing::UnorderedElementsAre; +using ContextKind = CodeCompletionContext::Kind; // GMock helpers for matching completion items. MATCHER_P(Named, Name, "") { return arg.Name == Name; } @@ -161,6 +163,16 @@ return S; } +TEST(DecisionForestRuntime, SanityTest) { + using Example = clangd::Example; + using clangd::Evaluate; + Example E1; + E1.setContextKind(ContextKind::CCC_ArrowMemberAccess); + Example E2; + E2.setContextKind(ContextKind::CCC_SymbolOrNewName); + EXPECT_GT(Evaluate(E1), Evaluate(E2)); +} + TEST(CompletionTest, Limit) { clangd::CodeCompleteOptions Opts; Opts.Limit = 2; diff --git a/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp @@ -0,0 +1,29 @@ +#include "DecisionForestRuntimeTest.h" +#include "decision_forest_model/CategoricalFeature.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { + +TEST(DecisionForestRuntime, Evaluate) { + using Example = ::ns1::ns2::test::Example; + using Cat = ::ns1::ns2::TestEnum; + using ::ns1::ns2::test::Evaluate; + + Example E; + E.setANumber(200); // True + E.setAFloat(0); // True: +10.0 + E.setACategorical(Cat::A); // True: +5.0 + EXPECT_EQ(Evaluate(E), 15.0); + + E.setANumber(200); // True + E.setAFloat(-2.5); // False: -20.0 + E.setACategorical(Cat::B); // True: +5.0 + EXPECT_EQ(Evaluate(E), -15.0); + + E.setANumber(100); // False + E.setACategorical(Cat::C); // True: +3.0, False: -6.0 + EXPECT_EQ(Evaluate(E), -3.0); +} +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h @@ -0,0 +1,5 @@ +namespace ns1 { +namespace ns2 { +enum TestEnum { A, B, C, D }; +} // namespace ns2 +} // namespace ns1 diff --git a/clang-tools-extra/clangd/unittests/decision_forest_model/features.json b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json @@ -0,0 +1,16 @@ +[ + { + "name": "ANumber", + "type": "NUMBER" + }, + { + "name": "AFloat", + "type": "NUMBER" + }, + { + "name": "ACategorical", + "type": "ENUM", + "enum": "ns1::ns2::TestEnum", + "header": "decision_forest_model/CategoricalFeature.h" + } +] \ No newline at end of file diff --git a/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json @@ -0,0 +1,52 @@ +[ + { + "operation": "if_greater", + "feature": "ANumber", + "threshold": 200.0, + "then": { + "operation": "if_greater", + "feature": "AFloat", + "threshold": -1, + "then": { + "operation": "boost", + "score": 10.0 + }, + "else": { + "operation": "boost", + "score": -20.0 + } + }, + "else": { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "C" + ], + "then": { + "operation": "boost", + "score": 3.0 + }, + "else": { + "operation": "boost", + "score": -4.0 + } + } + }, + { + "operation": "if_member", + "feature": "ACategorical", + "set": [ + "A", + "B" + ], + "then": { + "operation": "boost", + "score": 5.0 + }, + "else": { + "operation": "boost", + "score": -6.0 + } + } +] \ No newline at end of file