diff --git a/lldb/docs/use/variable.rst b/lldb/docs/use/variable.rst --- a/lldb/docs/use/variable.rst +++ b/lldb/docs/use/variable.rst @@ -1109,6 +1109,39 @@ (std::string) Q = "Hello world" } +Callback-based type matching +---------------------------- + +Even though regular expression matching works well for the vast majority of data +formatters (you normally know the name of the type you're writing a formatter +for), there are some cases where it's useful to look at the type before deciding +what formatter to apply. + +As an example scenario, imagine we have a code generator that produces some +classes that inherit from a common ``GeneratedObject`` class, and we have a +summary function and a synthetic child provider that work for all +``GeneratedObject`` instances (they all follow the same pattern). However, there +is no common pattern in the name of these classes, so we can't register the +formatter neither by name nor by regular expression. + +In that case, you can write a recognizer function like this: + +:: + + def is_generated_object(sbtype, internal_dict): + for base in sbtype.get_bases_array(): + if base.GetName() == "GeneratedObject" + return True + return False + +And pass this function to ``type summary add`` and ``type synthetic add`` using +the flag ``--recognizer-function``. + +:: + + (lldb) type summary add --expand --python-function my_summary_function --recognizer-function is_generated_object + (lldb) type synthetic add --python-class my_child_provider --recognizer-function is_generated_object + Objective-C Dynamic Type Discovery ---------------------------------- diff --git a/lldb/examples/synthetic/recognizer_function/example.py b/lldb/examples/synthetic/recognizer_function/example.py new file mode 100644 --- /dev/null +++ b/lldb/examples/synthetic/recognizer_function/example.py @@ -0,0 +1,63 @@ +# Formatters for classes that derive from Message. +# +# Usage: +# command script import ./example.py +# type summary add --expand --recognizer-function --python-function example.message_summary example.is_message_type +# type synth add --recognizer-function --python-class example.MessageChildProvider example.is_message_type + +import sys + +def is_message_type(t, internal_dict): + for base in t.get_bases_array(): + if base.GetName() == "Message": + return True + return False + +def message_summary(value, internal_dict): + # Could have used a summary string as well. All the work is done by the child + # provider. + return "Message" + +class MessageChildProvider: + def __init__(self, value, internal_dict): + self.value = value + self.synthetic_children = self._analyze_children(value) + + def has_children(self): + return self.num_children() > 0 + + def num_children(self): + return len(self.synthetic_children) + + def get_child_index(self, name): + for index, child in enumerate(self.synthetic_children): + if child.GetName() == name: + return index + return None + + def get_child_at_index(self, index): + return self.synthetic_children[index] + + def _rename_sbvalue(self, value): + # We want to display the field with its original name without a trailing + # underscore. So we create a new SBValue with the same type and address but + # a different name. + name = value.GetName() + assert name.endswith("_") + new_name = name[:-1] + return value.CreateValueFromAddress(new_name, value.GetLoadAddress(), + value.GetType()) + + def _analyze_children(self, value): + result = [] + for i in range(value.GetNumChildren()): + child = value.GetChildAtIndex(i) + child_name = child.GetName() + if child_name.startswith("_"): + continue # Internal field, skip + # Normal field. Check presence bit. + presence_bit = value.GetChildMemberWithName("_has_" + child_name) + if presence_bit.GetValueAsUnsigned() != 0: + result.append(self._rename_sbvalue(child)) + return result + diff --git a/lldb/examples/synthetic/recognizer_function/lldb-commands b/lldb/examples/synthetic/recognizer_function/lldb-commands new file mode 100644 --- /dev/null +++ b/lldb/examples/synthetic/recognizer_function/lldb-commands @@ -0,0 +1,7 @@ +command script import ./example.py +type summary add --expand --recognizer-function --python-function example.message_summary example.is_message_type +type synth add --recognizer-function --python-class example.MessageChildProvider example.is_message_type +b program.cpp:112 +r +p customer +p order diff --git a/lldb/examples/synthetic/recognizer_function/program.cpp b/lldb/examples/synthetic/recognizer_function/program.cpp new file mode 100644 --- /dev/null +++ b/lldb/examples/synthetic/recognizer_function/program.cpp @@ -0,0 +1,114 @@ +// Example program for matching summary functions and synthetic child providers. +// +// The classes here simulate code generated by a serialization tool like, for +// example, protocol buffers. But the actual "generated" class layout is +// extremely naive to simplify the example. +// +// The idea is that we want to have generic formatters for a bunch of message +// classes, because they are all generated following common patterns, but the +// matching can't be based in the type name, because it can be anything. + +#include + +class Message { + // Dummy method definitions to illustrate a possible generic message API. + std::string serialize() { return "TODO"; } + Message* deserialize() { + return nullptr; // TODO. + } +}; + +// This class could have been generated from a description like this. Assume +// fields are always optional, for simplicity (e.g. we don't care during +// serialization if a Customer has a name or not, we're just moving data around +// and validation happens elsewhere). +// +// message Customer { +// string name; +// int age; +// string address; +// } +class Customer : public Message { + private: + int _internal_bookkeeping_bits_; + + // Presence bits. They are true if the field has been set. + bool _has_name_ = false; + bool _has_age_ = false; + bool _has_address_ = false; + + // Actual field data. + std::string name_; + int age_; + std::string address_; + + public: + // Getters and setters. + bool has_name() { return _has_name_; } + bool has_age() { return _has_age_; } + bool has_address() { return _has_address_; } + + std::string name() { return name_; } + int age() { return age_; } + std::string address() { return address_; } + + void set_name(std::string name) { + name_ = name; + _has_name_ = true; + } + void set_age(int age) { + age_ = age; + _has_age_ = true; + } + void set_address(std::string address) { + address_ = address; + _has_address_ = true; + } +}; + +// message ProductOrder { +// string product_name; +// int amount; +// } +class ProductOrder : public Message { + private: + int _internal_bookkeeping_bits_; + + // Presence bits. They are true if the field has been set. + bool _has_product_name_ = false; + bool _has_amount_ = false; + + // Actual field data. + std::string product_name_; + int amount_; + + public: + // Getters and setters. + bool has_product_name() { return _has_product_name_; } + bool has_amount() { return _has_amount_; } + + std::string get_product_name() { return product_name_; } + int get_amount() { return amount_; } + + void set_product_name(std::string product_name) { + product_name_ = product_name; + _has_product_name_ = true; + } + void set_amount(int amount) { + amount_ = amount; + _has_amount_ = true; + } +}; + +int main(int argc, char **argv) { + Customer customer; + customer.set_name("C. Ustomer"); + customer.set_address("123 Fake St."); + // no age, so we can check absent fields get omitted. + + ProductOrder order; + order.set_product_name("widget"); + order.set_amount(100); + return 0; // break here. +} + diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp --- a/lldb/source/Commands/CommandObjectType.cpp +++ b/lldb/source/Commands/CommandObjectType.cpp @@ -49,13 +49,15 @@ public: TypeSummaryImpl::Flags m_flags; StringList m_target_types; - bool m_regex; + FormatterMatchType m_match_type; ConstString m_name; std::string m_category; - ScriptAddOptions(const TypeSummaryImpl::Flags &flags, bool regx, - ConstString name, std::string catg) - : m_flags(flags), m_regex(regx), m_name(name), m_category(catg) {} + ScriptAddOptions(const TypeSummaryImpl::Flags &flags, + FormatterMatchType match_type, ConstString name, + std::string catg) + : m_flags(flags), m_match_type(match_type), m_name(name), + m_category(catg) {} typedef std::shared_ptr SharedPointer; }; @@ -65,13 +67,14 @@ bool m_skip_pointers; bool m_skip_references; bool m_cascade; - bool m_regex; + FormatterMatchType m_match_type; StringList m_target_types; std::string m_category; - SynthAddOptions(bool sptr, bool sref, bool casc, bool regx, std::string catg) + SynthAddOptions(bool sptr, bool sref, bool casc, + FormatterMatchType match_type, std::string catg) : m_skip_pointers(sptr), m_skip_references(sref), m_cascade(casc), - m_regex(regx), m_category(catg) {} + m_match_type(match_type), m_category(catg) {} typedef std::shared_ptr SharedPointer; }; @@ -121,7 +124,7 @@ // Instance variables to hold the values for command options. TypeSummaryImpl::Flags m_flags; - bool m_regex = false; + FormatterMatchType m_match_type = eFormatterMatchExact; std::string m_format_string; ConstString m_name; std::string m_python_script; @@ -139,8 +142,6 @@ bool Execute_StringSummary(Args &command, CommandReturnObject &result); public: - enum SummaryFormatType { eRegularSummary, eRegexSummary, eNamedSummary }; - CommandObjectTypeSummaryAdd(CommandInterpreter &interpreter); ~CommandObjectTypeSummaryAdd() override = default; @@ -198,12 +199,9 @@ Status error; for (const std::string &type_name : options->m_target_types) { - CommandObjectTypeSummaryAdd::AddSummary( - ConstString(type_name), script_format, - (options->m_regex - ? CommandObjectTypeSummaryAdd::eRegexSummary - : CommandObjectTypeSummaryAdd::eRegularSummary), - options->m_category, &error); + AddSummary(ConstString(type_name), script_format, + options->m_match_type, options->m_category, + &error); if (error.Fail()) { error_sp->Printf("error: %s", error.AsCString()); error_sp->Flush(); @@ -211,15 +209,11 @@ } if (options->m_name) { - CommandObjectTypeSummaryAdd::AddSummary( - options->m_name, script_format, - CommandObjectTypeSummaryAdd::eNamedSummary, - options->m_category, &error); + CommandObjectTypeSummaryAdd::AddNamedSummary( + options->m_name, script_format, &error); if (error.Fail()) { - CommandObjectTypeSummaryAdd::AddSummary( - options->m_name, script_format, - CommandObjectTypeSummaryAdd::eNamedSummary, - options->m_category, &error); + CommandObjectTypeSummaryAdd::AddNamedSummary( + options->m_name, script_format, &error); if (error.Fail()) { error_sp->Printf("error: %s", error.AsCString()); error_sp->Flush(); @@ -261,9 +255,12 @@ io_handler.SetIsDone(true); } - static bool AddSummary(ConstString type_name, lldb::TypeSummaryImplSP entry, - SummaryFormatType type, std::string category, - Status *error = nullptr); + bool AddSummary(ConstString type_name, lldb::TypeSummaryImplSP entry, + FormatterMatchType match_type, std::string category, + Status *error = nullptr); + + bool AddNamedSummary(ConstString summary_name, lldb::TypeSummaryImplSP entry, + Status *error = nullptr); protected: bool DoExecute(Args &command, CommandReturnObject &result) override; @@ -322,7 +319,18 @@ m_category = std::string(option_arg); break; case 'x': - m_regex = true; + if (m_match_type == eFormatterMatchCallback) + error.SetErrorString( + "can't use --regex and --recognizer-function at the same time"); + else + m_match_type = eFormatterMatchRegex; + break; + case '\x01': + if (m_match_type == eFormatterMatchRegex) + error.SetErrorString( + "can't use --regex and --recognizer-function at the same time"); + else + m_match_type = eFormatterMatchCallback; break; default: llvm_unreachable("Unimplemented option"); @@ -339,7 +347,7 @@ m_category = "default"; is_class_based = false; handwrite_python = false; - m_regex = false; + m_match_type = eFormatterMatchExact; } llvm::ArrayRef GetDefinitions() override { @@ -356,7 +364,7 @@ std::string m_category; bool is_class_based; bool handwrite_python; - bool m_regex; + FormatterMatchType m_match_type; }; CommandOptions m_options; @@ -436,12 +444,9 @@ for (const std::string &type_name : options->m_target_types) { if (!type_name.empty()) { - if (!CommandObjectTypeSynthAdd::AddSynth( - ConstString(type_name), synth_provider, - options->m_regex - ? CommandObjectTypeSynthAdd::eRegexSynth - : CommandObjectTypeSynthAdd::eRegularSynth, - options->m_category, &error)) { + if (AddSynth(ConstString(type_name), synth_provider, + options->m_match_type, options->m_category, + &error)) { error_sp->Printf("error: %s\n", error.AsCString()); error_sp->Flush(); break; @@ -480,15 +485,13 @@ } public: - enum SynthFormatType { eRegularSynth, eRegexSynth }; - CommandObjectTypeSynthAdd(CommandInterpreter &interpreter); ~CommandObjectTypeSynthAdd() override = default; - static bool AddSynth(ConstString type_name, lldb::SyntheticChildrenSP entry, - SynthFormatType type, std::string category_name, - Status *error); + bool AddSynth(ConstString type_name, lldb::SyntheticChildrenSP entry, + FormatterMatchType match_type, std::string category_name, + Status *error); }; // CommandObjectTypeFormatAdd @@ -1167,7 +1170,18 @@ m_flags.SetSkipReferences(true); break; case 'x': - m_regex = true; + if (m_match_type == eFormatterMatchCallback) + error.SetErrorString( + "can't use --regex and --recognizer-function at the same time"); + else + m_match_type = eFormatterMatchRegex; + break; + case '\x01': + if (m_match_type == eFormatterMatchRegex) + error.SetErrorString( + "can't use --regex and --recognizer-function at the same time"); + else + m_match_type = eFormatterMatchCallback; break; case 'n': m_name.SetString(option_arg); @@ -1204,7 +1218,7 @@ .SetSkipReferences(false) .SetHideItemNames(false); - m_regex = false; + m_match_type = eFormatterMatchExact; m_name.Clear(); m_python_script = ""; m_python_function = ""; @@ -1278,7 +1292,7 @@ } else { // Use an IOHandler to grab Python code from the user auto options = std::make_unique( - m_options.m_flags, m_options.m_regex, m_options.m_name, + m_options.m_flags, m_options.m_match_type, m_options.m_name, m_options.m_category); for (auto &entry : command.entries()) { @@ -1306,10 +1320,8 @@ Status error; for (auto &entry : command.entries()) { - CommandObjectTypeSummaryAdd::AddSummary( - ConstString(entry.ref()), script_format, - (m_options.m_regex ? eRegexSummary : eRegularSummary), - m_options.m_category, &error); + AddSummary(ConstString(entry.ref()), script_format, m_options.m_match_type, + m_options.m_category, &error); if (error.Fail()) { result.AppendError(error.AsCString()); return false; @@ -1317,8 +1329,7 @@ } if (m_options.m_name) { - AddSummary(m_options.m_name, script_format, eNamedSummary, - m_options.m_category, &error); + AddNamedSummary(m_options.m_name, script_format, &error); if (error.Fail()) { result.AppendError(error.AsCString()); result.AppendError("added to types, but not given a name"); @@ -1379,9 +1390,8 @@ } ConstString typeCS(arg_entry.ref()); - AddSummary(typeCS, entry, - (m_options.m_regex ? eRegexSummary : eRegularSummary), - m_options.m_category, &error); + AddSummary(typeCS, entry, m_options.m_match_type, m_options.m_category, + &error); if (error.Fail()) { result.AppendError(error.AsCString()); @@ -1390,8 +1400,7 @@ } if (m_options.m_name) { - AddSummary(m_options.m_name, entry, eNamedSummary, m_options.m_category, - &error); + AddNamedSummary(m_options.m_name, entry, &error); if (error.Fail()) { result.AppendError(error.AsCString()); result.AppendError("added to types, but not given a name"); @@ -1546,31 +1555,29 @@ return false; } +bool CommandObjectTypeSummaryAdd::AddNamedSummary(ConstString summary_name, + TypeSummaryImplSP entry, + Status *error) { + // system named summaries do not exist (yet?) + DataVisualization::NamedSummaryFormats::Add(summary_name, entry); + return true; +} + bool CommandObjectTypeSummaryAdd::AddSummary(ConstString type_name, TypeSummaryImplSP entry, - SummaryFormatType type, + FormatterMatchType match_type, std::string category_name, Status *error) { - - // Named summaries are a special case, they exist in their own map in the - // FormatManager, outside of any categories. - if (type == eNamedSummary) { - // system named summaries do not exist (yet?) - DataVisualization::NamedSummaryFormats::Add(type_name, entry); - return true; - } - lldb::TypeCategoryImplSP category; DataVisualization::Categories::GetCategory(ConstString(category_name.c_str()), category); - if (type == eRegularSummary) { + if (match_type == eFormatterMatchExact) { if (FixArrayTypeNameWithRegex(type_name)) - type = eRegexSummary; + match_type = eFormatterMatchRegex; } - FormatterMatchType match_type = eFormatterMatchExact; - if (type == eRegexSummary) { + if (match_type == eFormatterMatchRegex) { match_type = eFormatterMatchRegex; RegularExpression typeRX(type_name.GetStringRef()); if (!typeRX.IsValid()) { @@ -1580,6 +1587,18 @@ return false; } } + + if (match_type == eFormatterMatchCallback) { + const char *function_name = type_name.AsCString(); + ScriptInterpreter *interpreter = GetDebugger().GetScriptInterpreter(); + if (interpreter && !interpreter->CheckObjectExists(function_name)) { + error->SetErrorStringWithFormat( + "The provided recognizer function \"%s\" does not exist - " + "please define it before attempting to use this summary.\n", + function_name); + return false; + } + } category->AddTypeSummary(type_name.GetStringRef(), match_type, entry); return true; } @@ -2187,7 +2206,7 @@ Args &command, CommandReturnObject &result) { auto options = std::make_unique( m_options.m_skip_pointers, m_options.m_skip_references, - m_options.m_cascade, m_options.m_regex, m_options.m_category); + m_options.m_cascade, m_options.m_match_type, m_options.m_category); for (auto &entry : command.entries()) { if (entry.ref().empty()) { @@ -2257,9 +2276,8 @@ } ConstString typeCS(arg_entry.ref()); - if (!AddSynth(typeCS, entry, - m_options.m_regex ? eRegexSynth : eRegularSynth, - m_options.m_category, &error)) { + if (!AddSynth(typeCS, entry, m_options.m_match_type, m_options.m_category, + &error)) { result.AppendError(error.AsCString()); return false; } @@ -2287,22 +2305,22 @@ bool CommandObjectTypeSynthAdd::AddSynth(ConstString type_name, SyntheticChildrenSP entry, - SynthFormatType type, + FormatterMatchType match_type, std::string category_name, Status *error) { lldb::TypeCategoryImplSP category; DataVisualization::Categories::GetCategory(ConstString(category_name.c_str()), category); - if (type == eRegularSynth) { + if (match_type == eFormatterMatchExact) { if (FixArrayTypeNameWithRegex(type_name)) - type = eRegexSynth; + match_type = eFormatterMatchRegex; } // Only check for conflicting filters in the same category if `type_name` is // an actual type name. Matching a regex string against registered regexes // doesn't work. - if (type == eRegularSynth) { + if (match_type == eFormatterMatchExact) { // It's not generally possible to get a type object here. For example, this // command can be run before loading any binaries. Do just a best-effort // name-based lookup here to try to prevent conflicts. @@ -2318,9 +2336,7 @@ } } - FormatterMatchType match_type = eFormatterMatchExact; - if (type == eRegexSynth) { - match_type = eFormatterMatchRegex; + if (match_type == eFormatterMatchRegex) { RegularExpression typeRX(type_name.GetStringRef()); if (!typeRX.IsValid()) { if (error) @@ -2330,6 +2346,18 @@ } } + if (match_type == eFormatterMatchCallback) { + const char *function_name = type_name.AsCString(); + ScriptInterpreter *interpreter = GetDebugger().GetScriptInterpreter(); + if (interpreter && !interpreter->CheckObjectExists(function_name)) { + error->SetErrorStringWithFormat( + "The provided recognizer function \"%s\" does not exist - " + "please define it before attempting to use this summary.\n", + function_name); + return false; + } + } + category->AddTypeSynthetic(type_name.GetStringRef(), match_type, entry); return true; } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -1189,6 +1189,11 @@ Desc<"Don't use this format for references-to-type objects.">; def type_summary_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">; + def type_summary_add_recognizer_function : + Option<"recognizer-function", "\\x01">, + Desc<"The names in the argument list are actually the names of python " + "functions that decide whether to use this summary for any given type. " + "Cannot be specified at the same time as --regex (-x).">; def type_summary_add_inline_children : Option<"inline-children", "c">, Group<1>, Required, Desc<"If true, inline all child values into summary string.">; @@ -1230,6 +1235,11 @@ "children.">; def type_synth_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">; + def type_synth_add_recognizer_function : + Option<"recognizer-function", "\\x01">, + Desc<"The names in the argument list are actually the names of python " + "functions that decide whether to use this summary for any given type. " + "Cannot be specified at the same time as --regex (-x).">; } let Command = "type format add" in { diff --git a/lldb/test/API/functionalities/data-formatter/callback-matching/TestDataFormatterCallbackMatching.py b/lldb/test/API/functionalities/data-formatter/callback-matching/TestDataFormatterCallbackMatching.py --- a/lldb/test/API/functionalities/data-formatter/callback-matching/TestDataFormatterCallbackMatching.py +++ b/lldb/test/API/functionalities/data-formatter/callback-matching/TestDataFormatterCallbackMatching.py @@ -16,7 +16,7 @@ # Find the line number to break at. self.line = line_number('main.cpp', '// Set break point at this line.') - def test_callback_matchers(self): + def test_callback_matchers_api_registration(self): """Test data formatter commands.""" self.build() @@ -31,6 +31,7 @@ # now set up a summary function that uses a python callback to match # classes that derive from `Base`. self.runCmd("command script import --allow-reload ./formatters_with_callback.py") + self.runCmd("script formatters_with_callback.register_formatters(lldb.debugger)") # Now `derived` should use our callback summary + synthetic children. self.expect("frame variable derived", @@ -47,3 +48,40 @@ substrs=['hello from callback summary']) self.expect("frame variable nd", substrs=['z = 4444']) + + def test_callback_matchers_cli_registration(self): + """Test data formatter commands.""" + self.build() + + _, process, thread, _ = lldbutil.run_to_line_breakpoint( + self, lldb.SBFileSpec("main.cpp"), self.line) + + # Print derived without a formatter. + self.expect("frame variable derived", + substrs=['x = 2222', + 'y = 3333']) + + # now set up a summary function that uses a python callback to match + # classes that derive from `Base`. + self.runCmd("command script import --allow-reload ./formatters_with_callback.py") + self.runCmd("type summary add -e -s 'hello from callback summary' " + "--recognizer-function formatters_with_callback.derives_from_base") + self.runCmd("type synth add -l formatters_with_callback.SynthProvider " + "--recognizer-function formatters_with_callback.derives_from_base") + + # Now `derived` should use our callback summary + synthetic children. + self.expect("frame variable derived", + substrs=['hello from callback summary', + 'synthetic_child = 9999']) + + # But not other classes. + self.expect("frame variable base", matching=False, + substrs=['hello from callback summary']) + self.expect("frame variable base", + substrs=['x = 1111']) + + self.expect("frame variable nd", matching=False, + substrs=['hello from callback summary']) + self.expect("frame variable nd", + substrs=['z = 4444']) + diff --git a/lldb/test/API/functionalities/data-formatter/callback-matching/formatters_with_callback.py b/lldb/test/API/functionalities/data-formatter/callback-matching/formatters_with_callback.py --- a/lldb/test/API/functionalities/data-formatter/callback-matching/formatters_with_callback.py +++ b/lldb/test/API/functionalities/data-formatter/callback-matching/formatters_with_callback.py @@ -24,7 +24,7 @@ return None -def __lldb_init_module(debugger, dict): +def register_formatters(debugger): cat = debugger.CreateCategory("callback_formatters") cat.AddTypeSummary( lldb.SBTypeNameSpecifier("formatters_with_callback.derives_from_base",