Index: docs/CMakeLists.txt =================================================================== --- docs/CMakeLists.txt +++ docs/CMakeLists.txt @@ -1,4 +1,3 @@ - include(FindDoxygen) if(DOXYGEN_FOUND) @@ -39,3 +38,12 @@ COMMENT "Generating LLDB Python API reference with epydoc" VERBATIM ) endif(EPYDOC_EXECUTABLE) + +if (LLVM_ENABLE_SPHINX) + include(AddSphinxTarget) + if (SPHINX_FOUND) + if (${SPHINX_OUTPUT_HTML}) + add_sphinx_target(html lldb) + endif() + endif() +endif() Index: docs/conf.py =================================================================== --- /dev/null +++ docs/conf.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- +# +# LLDB documentation build configuration file, created by +# sphinx-quickstart on Sun Dec 9 20:01:55 2012. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os +from datetime import date + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax', 'sphinx.ext.intersphinx'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'LLDB' +copyright = u'2007-%d, The LLDB Team' % date.today().year + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short version. +version = '8' +# The full version, including alpha/beta/rc tags. +release = '8' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build', 'analyzer'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'friendly' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'haiku' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'LLDBdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'LLDB.tex', u'LLDB Documentation', + u'The LLDB Team', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'LLDB', u'LLDB Documentation', + u'The LLDB Team', 'LLDB', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' Index: docs/index.rst =================================================================== --- /dev/null +++ docs/index.rst @@ -0,0 +1,45 @@ +.. title:: Welcome to LLDB's documentation! + +Goals & Status +============== + +.. toctree:: + :maxdepth: 1 + + status/about + status/goals + status/features + status/status + status/projects + +Use & Extension +=============== + +.. toctree:: + :maxdepth: 1 + + use/tutorial + use/map + use/formatting + use/variable + use/symbolication + use/symbols + use/python + use/remote + use/troubleshooting + use/architecture + +Resources +========= + +.. toctree:: + :maxdepth: 1 + + resources/test + resources/sbapi + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` Index: docs/resources/sbapi.rst =================================================================== --- /dev/null +++ docs/resources/sbapi.rst @@ -0,0 +1,55 @@ +The SB API Coding Rules +======================= + +The SB APIs constitute the stable C++ API that lldb presents to external +clients, and which get processed by SWIG to produce the Python bindings to +lldb. As such it is important that they not suffer from the binary +incompatibilities that C++ is so susceptible to. We've established a few rules +to ensure that this happens. + +The classes in the SB API's are all called SB, where SomeName is in +CamelCase starting with an upper case letter. The method names are all +CamelCase with initial capital letter as well. + +All the SB API classes are non-virtual, single inheritance classes. They should +only include SBDefines.h or other SB headers as needed. There should be no +inlined method implementations in the header files, they should all be in the +implementation files. And there should be no direct ivar access. + +You also need to choose the ivars for the class with care, since you can't add +or remove ivars without breaking binary compatibility. In some cases, the SB +class is a thin wrapper around an internal lldb_private object. In that case, +the class can have a single ivar, which is either a pointer, shared_ptr or +unique_ptr to the object in the lldb_private API. All the lldb_private classes +that get used this way are declared as opaque classes in lldb_forward.h, which +is included in SBDefines.h. So if you need an SB class to wrap an lldb_private +class that isn't in lldb_forward.h, add it there rather than making a direct +opaque declaration in the SB classes .h file. + +If the SB Class needs some state of its own, as well as the backing object, +don't include that as a direct ivar in the SB Class. Instead, make an Impl +class in the SB's .cpp file, and then make the SB object hold a shared or +unique pointer to the Impl object. The theory behind this is that if you need +more state in the SB object, those needs are likely to change over time, and +this way the Impl class can pick up members without changing the size of the +object. An example of this is the SBValue class. Please note that you should +not put this Impl class in the lldb namespace. Failure to do so leads to +leakage of weak-linked symbols in the SBAPI. + +In order to fit into the Python API's, we need to be able to default construct +all the SB objects. Since the ivars of the classes are all pointers of one sort +or other, this can easily be done, but it means all the methods must be +prepared to handle their opaque implementation pointer being empty, and doing +something reasonable. We also always have an "IsValid" method on all the SB +classes to report whether the object is empty or not. + +Another piece of the SB API infrastructure is the Python (or other script +interpreter) customization. SWIG allows you to add property access, iterators +and documentation to classes, but to do that you have to use a Swig interface +file in place of the .h file. Those files have a different format than a +straight C++ header file. These files are called SB.i, and live in +"scripts/interface". They are constructed by starting with the associated .h +file, and adding documentation and the Python decorations, etc. We do this in a +decidedly low-tech way, by maintaining the two files in parallel. That +simplifies the build process, but it does mean that if you add a method to the +C++ API's for an SB class, you have to copy the interface to the .i file. Index: docs/resources/test.rst =================================================================== --- /dev/null +++ docs/resources/test.rst @@ -0,0 +1,164 @@ +Testing LLDB +============ + +The LLDB test suite consists of Python scripts located under the test +directory. Each script contains a number of test cases and is usually +accompanied by a C (C++, ObjC, etc.) source file. Each test first compiles the +source file and then uses LLDB to debug the resulting executable. The tests +verify both the LLDB command line interface and the scripting API. + +.. contents:: + :local: + +Running the Full Test Suite +--------------------------- + +**Windows Note**: In the examples that follow, any invocations of python should +be replaced with python_d, the debug interpreter, when running the test suite +against a debug version of LLDB. + +The easiest way to run the LLDB test suite is to use the ``check-lldb`` build +target. By default, the ``check-lldb`` target builds the test programs with the +same compiler that was used to build LLDB. To build the tests with a different +compiler, you can set the ``LLDB_TEST_C_COMPILER`` or the ``LLDB_TEST_CXX_COMPILER`` +CMake variables. These variables are ignored unless the respective +``LLDB_TEST_USE_CUSTOM_C_COMPILER`` and ``LLDB_TEST_USE_CUSTOM_CXX_COMPILER`` are set +to ``ON``. + +It is possible to customize the architecture of the test binaries and compiler +used by appending ``-A`` and ``-C`` options respectively to the CMake variable +``LLDB_TEST_USER_ARGS``. For example, to test LLDB against 32-bit binaries +built with a custom version of clang, do: + +:: + + > cmake -DLLDB_TEST_USER_ARGS="-A i386 -C /path/to/custom/clang" -G Ninja + > ninja check-lldb + +Note that multiple ``-A`` and ``-C`` flags can be specified to +``LLDB_TEST_USER_ARGS``. + +Note that on NetBSD you must export ``LD_LIBRARY_PATH=$PWD/lib`` in your +environment. This is due to lack of the ``$ORIGIN`` linker feature. + +Running a Specific Test or Set of Tests +--------------------------------------- + +In addition to running all the LLDB test suites with the "check-lldb" CMake target above, it is possible to run individual LLDB tests. For example, to run the test cases defined in TestInferiorCrashing.py, run: + +:: + + > cd $lldb/test + > python dotest.py --executable -p TestInferiorCrashing.py ../packages/Python/lldbsuite/test + +If the test is not specified by name (e.g. if you leave the -p argument off), LLDB will run all tests in that directory: + +:: + + > python dotest.py --executable functionalities/data-formatter + +Many more options that are available. To see a list of all of them, run: + +:: + + > python dotest.py -h + +The ``dotest.py`` script runs tests in parallel by default. To disable the parallel +test running feature, use the ``--no-multiprocess`` flag. The number of concurrent +tests is controlled by the ``LLDB_TEST_THREADS`` environment variable or the +``--threads command`` line parameter. The default value is the number of CPU cores +on your system. + +The parallel test running feature will handle an additional ``--test-subdir +SUBDIR`` arg. When specified, ``SUBDIR`` is relative to the root test directory +and will limit all parallel test running to that subdirectory's tree of tests. + +The parallel test runner will run all tests within a given directory serially, +but will run multiple directories concurrently. Thus, as a test writer, we +provide serialized test run semantics within a directory. Note child +directories are considered entirely separate, so two child directories could be +running in parallel with a parent directory. + +Running the Test Suite Remotely +------------------------------- + +Running the test-suite remotely is similar to the process of running a local +test suite, but there are two things to have in mind: + +1. You must have the lldb-server running on the remote system, ready to accept + multiple connections. For more information on how to setup remote debugging + see the Remote debugging page. +2. You must tell the test-suite how to connect to the remote system. This is + achieved using the ``--platform-name``, ``--platform-url`` and + ``--platform-working-dir`` parameters to ``dotest.py``. These parameters + correspond to the platform select and platform connect LLDB commands. You + will usually also need to specify the compiler and architecture for the + remote system. + +Currently, running the remote test suite is supported only with ``dotest.py`` (or +dosep.py with a single thread), but we expect this issue to be addressed in the +near future. + +Debugging Test Failures +----------------------- + +On non-Windows platforms, you can use the ``-d`` option to ``dotest.py`` which +will cause the script to wait for a while until a debugger is attached. + +Debugging Test Failures on Windows +---------------------------------- + +On Windows, it is strongly recommended to use Python Tools for Visual Studio +for debugging test failures. It can seamlessly step between native and managed +code, which is very helpful when you need to step through the test itself, and +then into the LLDB code that backs the operations the test is performing. + +A quick guide to getting started with PTVS is as follows: + +#. Install PTVS +#. Create a Visual Studio Project for the Python code. + #. Go to File -> New -> Project -> Python -> From Existing Python Code. + #. Choose llvm/tools/lldb as the directory containing the Python code. + #. When asked where to save the .pyproj file, choose the folder ``llvm/tools/lldb/pyproj``. This is a special folder that is ignored by the ``.gitignore`` file, since it is not checked in. +#. Set test/dotest.py as the startup file +#. Make sure there is a Python Environment installed for your distribution. For example, if you installed Python to ``C:\Python35``, PTVS needs to know that this is the interpreter you want to use for running the test suite. + #. Go to Tools -> Options -> Python Tools -> Environment Options + #. Click Add Environment, and enter Python 3.5 Debug for the name. Fill out the values correctly. +#. Configure the project to use this debug interpreter. + #. Right click the Project node in Solution Explorer. + #. In the General tab, Make sure Python 3.5 Debug is the selected Interpreter. + #. In Debug/Search Paths, enter the path to your ninja/lib/site-packages directory. + #. In Debug/Environment Variables, enter ``VCINSTALLDIR=C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\``. + #. If you want to enabled mixed mode debugging, check Enable native code debugging (this slows down debugging, so enable it only on an as-needed basis.) +#. Set the command line for the test suite to run. + #. Right click the project in solution explorer and choose the Debug tab. + #. Enter the arguments to dotest.py. Note you must add --no-multiprocess + #. Example command options: + +:: + + # quiet mode + -q + --arch=i686 + # Path to debug lldb.exe + --executable D:/src/llvmbuild/ninja/bin/lldb.exe + # Directory to store log files + -s D:/src/llvmbuild/ninja/lldb-test-traces + -u CXXFLAGS -u CFLAGS + # If a test crashes, show JIT debugging dialog. + --enable-crash-dialog + # Path to release clang.exe + -C d:\src\llvmbuild\ninja_release\bin\clang.exe + # Path to the particular test you want to debug. + -p TestPaths.py + # Root of test tree + D:\src\llvm\tools\lldb\packages\Python\lldbsuite\test + # Required in order to be able to debug the test. + --no-multiprocess + +:: + + -q --arch=i686 --executable D:/src/llvmbuild/ninja/bin/lldb.exe -s D:/src/llvmbuild/ninja/lldb-test-traces -u CXXFLAGS -u CFLAGS --enable-crash-dialog -C d:\src\llvmbuild\ninja_release\bin\clang.exe -p TestPaths.py D:\src\llvm\tools\lldb\packages\Python\lldbsuite\test --no-multiprocess + + + Index: docs/status/about.rst =================================================================== --- /dev/null +++ docs/status/about.rst @@ -0,0 +1,98 @@ +About +===== + +LLDB is a next generation, high-performance debugger. It is built as a set of +reusable components which highly leverage existing libraries in the larger LLVM +Project, such as the Clang expression parser and LLVM disassembler. + +LLDB is the default debugger in Xcode on Mac OS X and supports debugging C, +Objective-C and C++ on the desktop and iOS devices and simulator. + +All of the code in the LLDB project is available under the standard `LLVM +License `__, an open source +"BSD-style" license. + +Why a New Debugger? +------------------- + +In order to achieve our goals we decided to start with a fresh architecture +that would support modern multi-threaded programs, handle debugging symbols in +an efficient manner, use compiler based code knowledge and have plug-in support +for functionality and extensions. Additionally we want the debugger +capabilities to be available to other analysis tools, be they scripts or +compiled programs, without requiring them to be GPL. + +Compiler Integration Benefits +----------------------------- + +LLDB currently converts debug information into clang types so that it can +leverage the clang compiler infrastructure. This allows LLDB to support the +latest C, C++, Objective-C and Objective-C++ language features and runtimes in +expressions without having to reimplement any of this functionality. It also +leverages the compiler to take care of all ABI details when making functions +calls for expressions, when disassembling instructions and extracting +instruction details, and much more. + +The major benefits include: + +- Up to date language support for C, C++, Objective-C +- Multi-line expressions that can declare local variables and types +- Utilize the JIT for expressions when supported +- Evaluate expression Intermediate Representation (IR) when JIT can't be used + +Reusability +----------- + +The LLDB debugger APIs are exposed as a C++ object oriented interface in a +shared library. The lldb command line tool links to, and uses this public API. +On Mac OS X the shared library is exposed as a framework named LLDB.framework, +and unix systems expose it as lldb.so. The entire API is also then exposed +through Python script bindings which allow the API to be used within the LLDB +embedded script interpreter, and also in any python script that loads the +lldb.py module in standard python script files. See the Python Reference page +for more details on how and where Python can be used with the LLDB API. + +Sharing the LLDB API allows LLDB to not only be used for debugging, but also +for symbolication, disassembly, object and symbol file introspection, and much +more. + +Platform Support +---------------- + +LLDB is known to work on the following platforms, but ports to new platforms +are welcome: + +* Mac OS X desktop user space debugging for i386 and x86-64 +* iOS simulator debugging on i386 +* iOS device debugging on ARM +* Linux local user-space debugging for i386, x86-64 and PPC64le +* FreeBSD local user-space debugging for i386 and x86-64 +* Windows local user-space debugging for i386 (*) + +(*) Support for Windows is under active development. Basic functionality is +expected to work, with functionality improving rapidly. + +Get Involved +------------ + +To check out the code, use: + +svn co http://llvm.org/svn/llvm-project/lldb/trunk lldb + +Note that LLDB generally builds from top-of-trunk + +* On macOS with Xcode +* On Linux and FreeBSD (with clang and libstdc++/libc++) +* On NetBSD (with GCC and clang and libstdc++/libc++) +* On Windows with VS 2012 or higher using CMake + +See the LLDB Build Page for platform-specific build instructions. + +Discussions about LLDB should go to the `lldb-dev +`__ mailing list. Commit +messages for the lldb SVN module are automatically sent to the `lldb-commits +`__ mailing list , and +this is also the preferred mailing list for patch submissions. + +See the Projects page if you are looking for some interesting areas to +contribute to lldb. Index: docs/status/features.rst =================================================================== --- /dev/null +++ docs/status/features.rst @@ -0,0 +1,18 @@ +Features +======== + +LLDB supports a broad variety of basic debugging features such as reading DWARF, supporting step, next, finish, backtraces, etc. Some more interested bits are: + +* Plug-in architecture for portability and extensibility: + + * Object file parsers for executable file formats. Support currently includes Mach-O (32 and 64-bit) & ELF (32-bit). + * Object container parsers to extract object files contained within a file. Support currently includes universal Mach-O files & BSD Archives. + * Debug symbol file parsers to incrementally extract debug information from object files. Support currently includes DWARF & Mach-O symbol tables. + * Symbol vendor plug-ins collect data from a variety of different sources for an executable object. + * Disassembly plug-ins for each architecture. Support currently includes an LLVM disassembler for i386, x86-64 , ARM/Thumb, and PPC64le + * Debugger plug-ins implement the host and target specific functions required to debug. + +* SWIG-generated script bridging allows Python to access and control the public API of the debugger library. +* A remote protocol server, debugserver, implements Mac OS X debugging on i386 and x86-64. +* A command line debugger - the lldb executable itself. +* A framework API to the library. Index: docs/status/goals.rst =================================================================== --- /dev/null +++ docs/status/goals.rst @@ -0,0 +1,33 @@ +Goals +===== + +The current state of the art in open source debuggers are that they work in the +common cases for C applications, but don't handle many "hard cases" properly. +For example, C++ expression parsing, handling overloading, templates, +multi-threading, and other non-trivial scenarios all work in some base cases, +but don't work reliably. + +The goal of LLDB is to provide an amazing debugging experience that "just +works". We aim to solve these long-standing problems where debuggers get +confused, so that you can think about debugging your problem, not about +deficiencies in the debugger. + +With a long view, there is no good reason for a debugger to reinvent its own +C/C++ parser, type system, know all the target calling convention details, +implement its own disassembler, etc. By using the existing libraries vended by +the LLVM project, we believe that many of these problems will be defined away, +and the debugger can focus on important issues like process control, efficient +symbol reading and indexing, thread management, and other debugger-specific +problems. + +Some more specific goals include: + +* Build libraries for inclusion in IDEs, command line tools, and other analysis + tools +* High performance and efficient memory use +* Extensible: Python scriptable and use a plug-in architecture +* Reuse existing compiler technology where it makes sense +* Excellent multi-threaded debugging support +* Great support for C, Objective-C and C++ +* Retargetable to support multiple platforms +* Provide a base for debugger research and other innovation Index: docs/status/projects.rst =================================================================== --- /dev/null +++ docs/status/projects.rst @@ -0,0 +1,396 @@ +Projects +======== + +The following is a mostly unordered set of the ideas for improvements to the +LLDB debugger. Some are fairly deep, some would require less effort. + +.. contents:: + :local: + +Speed up type realization in lldb +--------------------------------- + +The type of problem I'm addressing here is the situation where you are +debugging a large program (lldb built with debug clang/swift will do) and you +go to print a simple expression, and lldb goes away for 30 seconds. When you +sample it, it is always busily churning through all the CU's in the world +looking for something. The problem isn't that looking for something in +particular is slow, but rather that we somehow turned an bounded search (maybe +a subtype of "std::string" into an unbounded search (all things with the name +of that subtype.) Or didn't stop when we got a reasonable answer proximate to +the context of the search, but let the search leak out globally. And quite +likely there are other issues that I haven't guessed yet. But if you end up +churning though 3 or 4 Gig of debug info, that's going to be slow no matter how +well written your debug reader is... + +My guess is the work will be more in the general symbol lookup than in the +DWARF parser in particular, but it may be a combination of both. + +As a user debugging a largish program, this is the most obvious lameness of +lldb. + +Symbol name completion in the expression parser +----------------------------------------------- + +This is the other obvious lameness of lldb. You can do: + +:: + + (lldb) frame var foo.b + +and we will tell you it is "foo.bar". But you can't do that in the expression +parser. This will require collaboration with the clang/swift folks to get the +right extension points in the compiler. And whatever they are, lldb will need +use them to tell the compiler about what names are available. It will be +important to avoid the pitfalls of #1 where we wander into the entire DWARF +world. + +Make a high speed asynchronous communication channel +---------------------------------------------------- + +All lldb debugging nowadays is done by talking to a debug agent. We used the +gdb-remote protocol because that is universal, and good enough, and you have to +support it anyway since so many little devices & JTAG's and VM's etc support +it. But it is really old, not terribly high performance, and can't really +handle sending or receiving messages while the process is supposedly running. +It should have compression built in, remove the hand-built checksums and rely +on the robust communication protocols we always have nowadays, allow for +out-of-order requests/replies, allow for reconnecting to a temporarily +disconnected debug session, regularize all of the packet formatting into JSON +or BSON or whatever while including a way to do large binary transfers. It must +be possible to come up with something faster, and better tunable for the many +communications pathways we end up supporting. + +Fix local variable lookup in the lldb expression parser +------------------------------------------------------- + +The injection of local variables into the clang expression parser is +currently done incorrectly - it happens too late in the lookup. This results +in namespace variables & functions, same named types and ivars shadowing +locals when it should be the other way around. An attempt was made to fix +this by manually inserting all the visible local variables into wrapper +function in the expression text. This mostly gets the job done but that +method means you have to realize all the types and locations of all local +variables for even the simplest of expressions, and when run on large +programs (e.g. lldb) it would cause unacceptable delays. And it was very +fragile since an error in realizing any of the locals would cause all +expressions run in that context to fail. We need to fix this by adjusting +the points where name lookup calls out to lldb in clang. + +Support calling SB & commands everywhere and support non-stop debugging +----------------------------------------------------------------------- + +There is a fairly ad-hoc system to handle when it is safe to run SB API's and +command line commands. This is actually a bit of a tricky problem, since we +allow access to the command line and SB API from some funky places in lldb. The +Operating System plugins are the most obvious instance, since they get run +right after lldb is told by debugserver that the process has stopped, but +before it has finished collating the information from the stop for presentation +to the higher levels. But breakpoint callbacks have some of the same problems, +and other things like the scripted stepping operations and any fancier +extension points we want to add to the debugger are going to be hard to +implement robustly till we work on a finer-grained and more explicit control +over who gets to control the process state. + +We also won't have any chance of supporting non-stop debugging - which is a +useful mode for programs that have a lot of high-priority or real-time worker +threads - until we get this sorted out. + +Finish the language abstraction and remove all the unnecessary API's +-------------------------------------------------------------------- + +An important part of making lldb a more useful "debugger toolkit" as opposed to +a C/C++/ObjC/Swift debugger is to have a clean abstraction for language +support. We did most, but not all, of the physical separation. We need to +finish that. And then by force of necessity the API's really look like the +interface to a C++ type system with a few swift bits added on. How you would +go about adding a new language is unclear and much more trouble than it is +worth at present. But if we made this nice, we could add a lot of value to +other language projects. + +Add some syntax to generate data formatters from type definitions +----------------------------------------------------------------- + +Uses of the data formatters fall into two types. There are data formatters for +types where the structure elements pretty much tell you how to present the +data, you just need a little expression language to express how to turn them +into what the user expects to see. Then there are the ones (like pretty much +all our Foundation/AppKit/UIKit formatters) that use deep magic to figure out +how the type is actually laid out. The latter are pretty much always going to +have to be done by hand. + +But for the ones where the information is expressed in the fields, it would be +great to have a way to express the instructions to produce summaries and +children in some form you could embed next to the types and have the compiler +produce a byte code form of the instructions and then make that available to +lldb along with the library. This isn't as simple as having clang run over the +headers and produce something from the types directly. After all, clang has no +way of knowing that the interesting thing about a std::vector is the elements +that you get by calling size (for the summary) and [] for the elements. But it +shouldn't be hard to come up with a generic markup to express this. + +Allow the expression parser to access dynamic type/data formatter information +----------------------------------------------------------------------------- + +This seems like a smaller one. The symptom is your object is Foo child of +Bar, and in the Locals view you see all the fields of Foo, but because the +static type of the object is Bar, you can't see any of the fields of Foo. +But if you could get this working, you could hijack the mechanism to make +the results of the value object summaries/synthetic children available to +expressions. And if you can do that, you could add other properties to an +object externally (through Python or some other extension point) and then +have these also available in the expression parser. You could use this to +express invariants for data structures, or other more advanced uses of types +in the debugger. + +Another version of this is to allow access to synthetic children in the +expression parser. Otherwise you end up in situations like: + +:: + + (lldb) print return_a_foo() + (SomeVectorLikeType) $1 = { + [0] = 0 + [1] = 1 + [2] = 2 + [3] = 3 + [4] = 4 + } + +That's good but: + +:: + + (lldb) print return_a_foo()[2] + +fails because the expression parser doesn't know anything about the +array-like nature of SomeVectorLikeType that it gets from the synthetic +children. + +Recover thread information lazily +--------------------------------- + +LLDB stores all the user intentions for a thread in the ThreadPlans stored in +the Thread class. That allows us to reliably implement a very natural model for +users moving through a debug session. For example, if step-over stops at a +breakpoint in an function in a younger region of the stack, continue will +complete the step-over rather than having to manually step out. But that means +that it is important that the Thread objects live as long as the Threads they +represent. For programs with many threads, but only one that you are debugging, +that makes stepping less efficient, since now you have to fetch the thread list +on every step or stepping doesn't work correctly. This is especially an issue +when the threads are provided by an Operating System plugin, where it may take +non-trivial work to reconstruct the thread list. It would be better to fetch +threads lazily but keep "unseen" threads in a holding area, and only retire +them when we know we've fetched the whole thread list and ensured they are no +longer alive. + +Add an extension point in the breakpoint search machinery +--------------------------------------------------------- + +This would allow highly customizable, algorithmic breakpoint types, like "break +on every use of some particular instruction, or instruction pattern, etc." + +Make Python-backed commands first class citizens +------------------------------------------------ + +As it stands, Python commands have no way to advertise their options. They are +required to parse their arguments by hand. That leads to inconsistency, and +more importantly means they can't take advantage of auto-generated help and +command completion. This leaves python-backed commands feeling worse than +built-in ones. + +As part of this job, it would also be great to hook automatically hook the +"type" of an option value or argument (e.g. eArgTypeShlibName) to sensible +default completers. You need to be able to over-ride this in more complicated +scenarios (like in "break set" where the presence of a "-s" option limits the +search for completion of a "-n" option.) But in common cases it is unnecessary +busy-work to have to supply the completer AND the type. If this worked, then it +would be easier for Python commands to also get correct completers. + +Reimplement the command interpreter commands using the SB API +------------------------------------------------------------- + +Currently, all the CommandObject::DoExecute methods are implemented using the +lldb_private API's. That generally means that there's code that gets duplicated +between the CommandObject and the SB API that does roughly the same thing. We +would reduce this code duplication, present a single coherent face to the users +of lldb, and keep ourselves more honest about what we need in the SB API's if +we implemented the CommandObjects::DoExecute methods using the SB API's. + +BTW, it is only the way it was much easier to develop lldb if it had a +functioning command-line early on. So we did that first, and developed the SB +API's when lldb was more mature. There's no good technical reason to have the +commands use the lldb_private API's. + +Documentation and better examples +--------------------------------- + +We need to put the lldb syntax docs in the tutorial somewhere that is more +easily accessible. On suggestion is to add non-command based help to the help +system, and then have a "help lldb" or "help syntax" type command with this +info. Be nice if the non-command based help could be hierarchical so you could +make topics. + +There's a fair bit of docs about the SB API's, but it is spotty. Some classes +are well documented in the Python "help (lldb.SBWhatever)" and some are not. + +We need more conceptual docs. And we need more examples. And we could provide a +clean pluggable example for using LLDB standalone from Python. The +process_events.py is a start of this, but it just handles process events, and +it is really a quick sketch not a polished expandable proto-tool. + +Make a more accessible plugin architecture for lldb +--------------------------------------------------- + +Right now, you can only use the Python or SB API's to extend an extant lldb. +You can't implement any of the actual lldb Plugins as plugins. That means +anybody that wants to add new Object file/Process/Language etc support has to +build and distribute their own lldb. This is tricky because the API's the +plugins use are currently not stable (and recently have been changing quite a +lot.) We would have to define a subset of lldb_private that you could use, and +some way of telling whether the plugins were compatible with the lldb. But +long-term, making this sort of extension possible will make lldb more appealing +for research and 3rd party uses. + +Use instruction emulation to reduce the overhead for breakpoints +---------------------------------------------------------------- + +At present, breakpoints are implemented by inserting a trap instruction, then +when the trap is hit, replace the trap with the actual instruction and single +step. Then swap back and continue. This causes problems for read only text, and +also means that no-stop debugging ust either stop all threads briefly to handle +this two-step or risk missing some breakpoint hits. If you emulated the +instruction and wrote back the results, you wouldn't have these problems, and +it would also save a stop per breakpoint hit. Since we use breakpoints to +implement stepping, this savings could be significant on slow connections. + +Use the JIT to speed up conditional breakpoint evaluation +--------------------------------------------------------- + +We already JIT and cache the conditional expressions for breakpoints for the C +family of languages, so we aren't re-compiling every time you hit the +breakpoint. And if we couldn't IR interpret the expression, we leave the JIT'ed +code in place for reuse. But it would be even better if we could also insert +the "stop or not" decision into the code at the breakpoint, so you would only +actually stop the process when the condition was true. Greg's idea was that if +you had a conditional breakpoint set when you started the debug session, Xcode +could rebuild and insert enough no-ops that we could instrument the breakpoint +site and call the conditional expression, and only trap if the conditional was +true. + +Broaden the idea in "target stop-hook" to cover more events in the debugger +--------------------------------------------------------------------------- + +Shared library loads, command execution, User directed memory/register reads +and writes are all places where you would reasonably want to hook into the +debugger. + +Mock classes for testing +------------------------ + +We need "ProcessMock" and "ObjectFileMock" and the like. These would be real +plugin implementations for their underlying lldb classes, with the addition +that you can prime them from some sort of text based input files. For classes +that manage changes over time (like process) you would need to program the +state at StopPoint 0, StopPoint 1, etc. These could then be used for testing +reactions to complex threading problems & the like, and also for simulating +hard-to-test environments (like bare board debugging). + +A Bug-Trapper infrastructure +---------------------------- + +We very often have bugs that can't be reproduced locally. So having a +bug-report-trapper that can gather enough information from the surroundings of +a bug so that we can replay the session locally would be a big help tracking +down issues in this situation. This is tricky because you can't necessarily +require folks to leak information about their code in order to file bug +reports. So not only will you have to figure out what state to gather, you're +also going to have to anonymize it somehow. But we very often have bugs from +people that can't reduce the problem to a simple test case and can't give us +our code, and we often just can't help them as things stand now. Note that +adding the ProcessMock would be a good first stage towards this, since you +could make a ProcessMock creator/serializer from the current lldb state. + +Expression parser needs syntax for "{symbol,type} A in CU B.cpp" +---------------------------------------------------------------- + +Sometimes you need to specify non-visible or ambiguous types to the expression +parser. We were planning to do $b_dot_cpp$A or something like. You might want +to specify a static in a function, in a source file, or in a shared library. So +the syntax should support all these. + +Add a "testButDontAbort" style test to the UnitTest framework +------------------------------------------------------------- + +The way we use unittest now (maybe this is the only way it can work, I don't +know) you can't report a real failure and continue with the test. That is +appropriate in some cases: if I'm supposed to hit breakpoint A before I +evaluate an expression, and don't hit breakpoint A, the test should fail. But +it means that if I want to test five different expressions, I can either do it +in one test, which is good because it means I only have to fire up one process, +attach to it, and get it to a certain point. But it also means if the first +test fails, the other four don't even get run. So though at first we wrote a +bunch of test like this, as time went on we switched more to writing "one at a +time" tests because they were more robust against a single failure. That makes +the test suite run much more slowly. It would be great to add a +"test_but_dont_abort" variant of the tests, then we could gang tests that all +drive to the same place and do similar things. As an added benefit, it would +allow us to be more thorough in writing tests, since each test would have lower +costs. + +Convert the dotest style tests to use lldbutil.run_to_source_breakpoint +----------------------------------------------------------------------- + +run_to_source_breakpoint & run_to_name_breakpoint provide a compact API that +does in one line what the first 10 or 20 lines of most of the old tests now do +by hand. Using these functions makes tests much more readable, and by +centralizing common functionality will make maintaining the testsuites easier +in the future. This is more of a finger exercise, and perhaps best implemented +by a rule like: "If you touch a test case, and it isn't using +run_to_source_breakpoint, please make it do so". + +Unify Watchpoint's & Breakpoints +-------------------------------- + +Option handling isn't shared, and more importantly the PerformAction's have a +lot of duplicated common code, most of which works less well on the Watchpoint +side. + +Reverse debugging +----------------- + +This is kind of a holy grail, it's hard to support for complex apps (many +threads, shared memory, etc.) But it would be SO nice to have... + +Non-stop debugging +------------------ + +By this I mean allowing some threads in the target program to run while +stopping other threads. This is supported in name in lldb at present, but lldb +makes the assumption "If I get a stop, I won't get another stop unless I +actually run the program." in a bunch of places so getting it to work reliably +will be some a good bit of work. And figuring out how to present this in the UI +will also be tricky. + +Fix and continue +---------------- + +We did this in gdb without a real JIT. The implementation shouldn't be that +hard, especially if you can build the executable for fix and continue. The +tricky part is how to verify that the user can only do the kinds of fixes that +are safe to do. No changing object sizes is easy to detect, but there were many +more subtle changes (function you are fixing is on the stack...) that take more +work to prevent. And then you have to explain these conditions the user in some +helpful way. + +Unified IR interpreter +---------------------- + +Currently IRInterpreter implements a portion of the LLVM IR, but it doesn't +handle vector data types and there are plenty of instructions it also doesn't +support. Conversely, lli supports most of LLVM's IR but it doesn't handle +remote memory and its function calling support is very rudimentary. It would be +useful to unify these and make the IR interpreter -- both for LLVM and LLDB -- +better. An alternate strategy would be simply to JIT into the current process +but have callbacks for non-stack memory access. Index: docs/status/status.rst =================================================================== --- /dev/null +++ docs/status/status.rst @@ -0,0 +1,65 @@ +Status +====== + +macOS +----- + +LLDB has matured a lot in the last year and can be used for C, C++ and +Objective-C development for x86_64, i386 and ARM debugging. The entire public +API is exposed though a framework on Mac OS X which is used by Xcode, the lldb +command line tool, and can also be used by Python. The entire public API is +exposed through script bridging which allows LLDB to use an embedded Python +script interpreter, as well as having a Python module named "lldb" which can be +used from Python on the command line. This allows debug sessions to be +scripted. It also allows powerful debugging actions to be created and attached +to a variety of debugging workflows. + +Linux +----- + +LLDB is improving on Linux. While the debugserver has not been ported (to +enable remote debugging) Linux is nearing feature completeness with Darwin to +debug x86_64 programs, and is partially working with i386 programs. ARM +architectures on Linux are untested. For more details, see the Features by OS +section below. + +FreeBSD +------- + +LLDB on FreeBSD lags behind the Linux implementation but is improving rapidly. +For more details, see the Features by OS section below. + +Windows +------- + +LLDB on Windows is still under development, but already useful for i386 +programs (x86_64 untested) built with DWARF debug information, including +postmortem analysis of minidumps. For more details, see the Features by OS +section below. + +Features Matrix +--------------- ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| Feature | FreeBSD | Linux | Mac OS X (i386/x86_64 and ARM/Thumb) | Windows (i386) | +| | (x86_64) | (x86_64 and PPC64le) | | | ++================================+============+=========================+======================================+======================+ +| Backtracing | OK | OK | OK | OK | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| Breakpoints | OK | OK | OK | OK | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| C++11: | OK | OK | OK | Unknown | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| Commandline lldb tool | OK | OK | OK | OK | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| Core file debugging | OK (ELF) | OK (ELF) | OK (MachO) | OK (Minidump) | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| Debugserver (remote debugging) | Not ported | Not ported | OK | Not ported | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| Disassembly | OK | OK | OK | OK | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| Expression evaluation | Unknown | Works with some bugs | OK | Works with some bugs | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| JIT debugging | Unknown | Symbolic debugging only | Untested | No | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ +| Objective-C 2.0: | Unknown | Not applicable | OK | Not applicable | ++--------------------------------+------------+-------------------------+--------------------------------------+----------------------+ Index: docs/use/architecture.rst =================================================================== --- /dev/null +++ docs/use/architecture.rst @@ -0,0 +1,192 @@ +Architecture +============ + +LLDB is a large and complex codebase. This section will help you become more +familiar with the pieces that make up LLDB and give a general overview of the +general architecture. + +LLDB has many code groupings that makeup the source base: + +.. contents:: + :local: + +API +--- + +The API folder contains the public interface to LLDB. + +We are currently vending a C++ API. In order to be able to add methods to this +API and allow people to link to our classes, we have certain rules that we must +follow: + +- Classes can't inherit from any other classes. +- Classes can't contain virtual methods. +- Classes should be compatible with script bridging utilities like swig. +- Classes should be lightweight and be backed by a single member. Pointers (or + shared pointers) are the preferred choice since they allow changing the + contents of the backend without affecting the public object layout. +- The interface should be as minimal as possible in order to give a complete + API. + +By adhering to these rules we should be able to continue to vend a C++ API, and +make changes to the API as any additional methods added to these classes will +just be a dynamic loader lookup and they won't affect the class layout (since +they aren't virtual methods, and no members can be added to the class). + +Breakpoint +---------- + +A collection of classes that implement our breakpoint classes. Breakpoints are +resolved symbolically and always continue to resolve themselves as your program +runs. Whether settings breakpoints by file and line, by symbol name, by symbol +regular expression, or by address, breakpoints will keep trying to resolve new +locations each time shared libraries are loaded. Breakpoints will of course +unresolve themselves when shared libraries are unloaded. Breakpoints can also +be scoped to be set only in a specific shared library. By default, breakpoints +can be set in any shared library and will continue to attempt to be resolved +with each shared library load. + +Breakpoint options can be set on the breakpoint, or on the individual +locations. This allows flexibility when dealing with breakpoints and allows us +to do what the user wants. + +Commands +-------- + +The command source files represent objects that implement the functionality for +all textual commands available in our command line interface. + +Every command is backed by a ``lldb_private::CommandObject`` or +``lldb_private::CommandObjectMultiword`` object. + +``lldb_private::CommandObjectMultiword`` are commands that have subcommands and +allow command line commands to be logically grouped into a hierarchy. + +``lldb_private::CommandObject`` command line commands are the objects that +implement the functionality of the command. They can optionally define options +for themselves, as well as group those options into logical groups that can go +together. The help system is tied into these objects and can extract the syntax +and option groupings to display appropriate help for each command. + +Core +---- + +The Core source files contain basic functionality that is required in the +debugger as well as the class represeting the debugger it self (Debugger). A +wide variety of classes are implemented: + +- Address (section offset addressing) +- AddressRange +- Broadcaster / Event / Listener +- Communication classes that use Connection objects +- Mangled names +- Source manager +- Value objects + +Dataformatters +-------------- + +A collection of classes that implement the data formatters subsystem. + +Data formatters provide a set of user-tweakable hooks in the ValueObjects world +that allow to customize presentation aspects of variables. While users interact +with formatters mostly through the type command, inside LLDB there are a few +layers to the implementation: DataVisualization at the highest end of the +spectrum, backed by classes implementing individual formatters, matching rules, +etc. + +For a general user-level introduction to data formatters, you can look here. + +More details on the architecture are to be found here. + +Expression +---------- + +Expression parsing files cover everything from evaluating DWARF expressions, to +evaluating expressions using Clang. + +The DWARF expression parser has been heavily modified to support type +promotion, new opcodes needed for evaluating expressions with symbolic variable +references (expression local variables, program variables), and other operators +required by typical expressions such as assign, address of, float/double/long +double floating point values, casting, and more. The DWARF expression parser +uses a stack of lldb_private::Value objects. These objects know how to do the +standard C type promotion, and allow for symbolic references to variables in +the program and in the LLDB process (expression local and expression global +variables). + +The expression parser uses a full instance of the Clang compiler in order to +accurately evaluate expressions. Hooks have been put into Clang so that the +compiler knows to ask about identifiers it doesn't know about. Once expressions +have be compiled into an AST, we can then traverse this AST and either generate +a DWARF expression that contains simple opcodes that can be quickly +re-evaluated each time an expression needs to be evaluated, or JIT'ed up into +code that can be run on the process being debugged. + +Host +---- + +LLDB tries to abstract itself from the host upon which it is currently running +by providing a host abstraction layer. This layer includes functionality, whose +implementation varies wildly from host to host. + +Host functionality includes abstraction layers for: + +- Information about the host system (triple, list of running processes, etc.) +- Launching processes +- Various OS primitives like pipes and sockets + +It also includes the base classes of the NativeProcess/Thread hierarchy, which +is used by lldb-server. + +Interpreter +----------- + +The interpreter classes are the classes responsible for being the base classes +needed for each command object, and is responsible for tracking and running +command line commands. + +Symbol +------ + +Symbol classes involve everything needed in order to parse object files and +debug symbols. All the needed classes for compilation units (code and debug +info for a source file), functions, lexical blocks within functions, inlined +functions, types, declaration locations, and variables are in this section. + +Target +------ + +Classes that are related to a debug target include: + +- Target +- Process +- Thread +- Stack frames +- Stack frame registers +- ABI for function calling in process being debugged +- Execution context batons + +Utility +------- + +This module contains the lowest layers of LLDB. A lot of these classes don't +really have anything to do with debugging -- they are just there because the +higher layers of the debugger use these clasess to implement their +functionality. Others are data structures used in many other parts of the +debugger (TraceOptions). Most of the functionality in this module could be +useful in an application that is not a debugger; however, providing a general +purpose C++ library is an explicit non-goal of this module. + +This module provides following functionality: + +- Abstract path manipulation (FileSpec) +- Architecture specification +- Data buffers (DataBuffer, DataEncoder, DataExtractor) +- Logging +- Structured data manipulation (JSON) +- Streams +- Timers + +For historic reasons, some of this functionality overlaps that which is +provided by the LLVM support library. Index: docs/use/formatting.rst =================================================================== --- /dev/null +++ docs/use/formatting.rst @@ -0,0 +1,297 @@ +Stack Frame and Thread Format +============================= + +.. contents:: + :local: + +LLDB has a facility to allow users to define the format of the information that +generates the descriptions for threads and stack frames. Typically when your +program stops at a breakpoint you will get two lines that describes why your +thread stopped and where: + +:: + + * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1 + frame #0: test`main at test.c:5 + +Stack backtraces frames also have a similar information line: + +:: + + (lldb) thread backtrace + * thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1 + frame #0: 0x0000000100000e85 a.out`main + 4 at test.c:19 + frame #1: 0x0000000100000e40 a.out`start + 52 + +The two format strings that govern the printing in these output forms can +currently be set using the settings set command: + +:: + + (lldb) settings set thread-stop-format STRING + (lldb) settings set frame-format STRING + +The first of these is an abbreviated thread output, that just contains data +about the thread, and not the stop frame. It will always get used in situations +where the frame output follows immediately, so that information would be +redundant. The second is the frame printing. + +There is another thread format used for commands like thread list where the +thread information isn't followed by frame info. In that case, it is convenient +to have frame zero information in the thread output. That format is set by: + +:: + + (lldb) settings set thread-format STRING + + +Format Strings +-------------- + +So what is the format of the format strings? Format strings can contain plain +text, control characters and variables that have access to the current program +state. + +Normal characters are any text that doesn't contain a ``{``, ``}``, ``$``, or +``\`` character. + +Variable names are found in between a ``${`` prefix, and end with a ``}`` +suffix. In other words, a variable looks like ``${frame.pc}``. + +Variables +--------- + +A complete list of currently supported format string variables is listed below: + ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **Variable Name** | **Description** | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``file.basename`` | The current compile unit file basename for the current frame. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``file.fullpath`` | The current compile unit file fullpath for the current frame. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``language`` | The current compile unit language for the current frame. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``frame.index`` | The frame index (0, 1, 2, 3...) | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``frame.no-debug`` | Evaluates to true if the frame has no debug info. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``frame.pc`` | The generic frame register for the program counter. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``frame.sp`` | The generic frame register for the stack pointer. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``frame.fp`` | The generic frame register for the frame pointer. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``frame.flags`` | The generic frame register for the flags register. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``frame.reg.NAME`` | Access to any platform specific register by name (replace ``NAME`` with the name of the desired register). | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``function.name`` | The name of the current function or symbol. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``function.name-with-args`` | The name of the current function with arguments and values or the symbol name. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``function.name-without-args`` | The name of the current function without arguments and values (used to include a function name in-line in the ``disassembly-format``) | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``function.pc-offset`` | The program counter offset within the current function or symbol | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``function.addr-offset`` | The offset in bytes of the current function, formatted as " + dddd" | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``function.concrete-only-addr-offset-no-padding`` | Similar to ``function.addr-offset`` except that there are no spaces in the output (e.g. "+dddd") and the offset is computed from the nearest concrete function -- inlined functions are not included | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``function.changed`` | Will evaluate to true when the line being formatted is a different symbol context from the previous line (may be used in ``disassembly-format`` to print the new function name on a line by itself at the start of a new function). Inlined functions are not considered for this variable | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``function.initial-function`` | Will evaluate to true if this is the start of the first function, as opposed to a change of functions (may be used in ``disassembly-format`` to print the function name for the first function being disassembled) | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``line.file.basename`` | The line table entry basename to the file for the current line entry in the current frame. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``line.file.fullpath`` | The line table entry fullpath to the file for the current line entry in the current frame. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``line.number`` | The line table entry line number for the current line entry in the current frame. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``line.start-addr`` | The line table entry start address for the current line entry in the current frame. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``line.end-addr`` | The line table entry end address for the current line entry in the current frame. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``module.file.basename`` | The basename of the current module (shared library or executable) | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``module.file.fullpath`` | The basename of the current module (shared library or executable) | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``process.file.basename`` | The basename of the file for the process | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``process.file.fullpath`` | The fullname of the file for the process | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``process.id`` | The process ID native to the system on which the inferior runs. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``process.name`` | The name of the process at runtime | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``thread.id`` | The thread identifier for the current thread | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``thread.index`` | The unique one based thread index ID which is guaranteed to be unique as threads come and go. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``thread.name`` | The name of the thread if the target OS supports naming threads | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``thread.queue`` | The queue name of the thread if the target OS supports dispatch queues | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``thread.stop-reason`` | A textual reason each thread stopped | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``thread.return-value`` | The return value of the latest step operation (currently only for step-out.) | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``thread.completed-expression`` | The expression result for a thread that just finished an interrupted expression evaluation. | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``target.arch`` | The architecture of the current target | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``script.target:python_func`` | Use a Python function to generate a piece of textual output | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``script.process:python_func`` | Use a Python function to generate a piece of textual output | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``script.thread:python_func`` | Use a Python function to generate a piece of textual output | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``script.frame:python_func`` | Use a Python function to generate a piece of textual output | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``current-pc-arrow`` | Prints either ``->`` or `` `` if the current pc value is matched (used in ``disassembly-format``) | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``addr-file-or-load`` | Formats an address either as a load address, or if process has not yet been launched, as a load address (used in ``disassembly-format``) | ++---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Control Characters +------------------ + +Control characters include ``{``, ``}``, and ``\``. + +The ``{`` and ``}`` are used for scoping blocks, and the ``\`` character allows +you to desensitize control characters and also emit non-printable characters. + +Desensitizing Characters in the Format String +--------------------------------------------- + +The backslash control character allows your to enter the typical ``\a``, +``\b``, ``\f``, ``\n``, ``\r``, ``\t``, ``\v``, ``\\``, characters and along +with the standard octal representation ``\0123`` and hex ``\xAB`` characters. +This allows you to enter escape characters into your format strings and will +allow colorized output for terminals that support color. + +Scoping +------- + +Many times the information that you might have in your prompt might not be +available and you won``t want it to print out if it isn``t valid. To take care +of this you can enclose everything that must resolve into a scope. A scope is +starts with ``{`` and ends with ``}``. For example in order to only display the +current frame line table entry basename and line number when the information is +available for the current frame: + +:: + + "{ at {$line.file.basename}:${line.number}}" + + +Broken down this is: + +- The start the scope: ``{`` , +- format whose content will only be displayed if all information is available: ``at {$line.file.basename}:${line.number}`` +- end the scope: ``}`` + +Making the Frame Format +----------------------- + +The information that we see when stopped in a frame: + +:: + + frame #0: 0x0000000100000e85 a.out`main + 4 at test.c:19 + +can be displayed with the following format: + +:: + + "frame #${frame.index}: ${frame.pc}{ ${module.file.basename}`${function.name}{${function.pc-offset}}}{ at ${line.file.basename}:${line.number}}\n" + +This breaks down to: + +- Always print the frame index and frame PC: ``frame #${frame.index}: ${frame.pc}``, +- only print the module followed by a tick if there is a valid module for the current frame: ``{ ${module.file.basename}`}``, +- print the function name with optional offset: ``{${function.name}{${function.pc-offset}}}``, +- print the line info if it is available: ``{ at ${line.file.basename}:${line.number}}``, +- then finish off with a newline: ``\n``. + +Making Your own Formats +----------------------- + +When modifying your own format strings, it is useful to start with the default +values for the frame and thread format strings. These can be accessed with the +``settings show`` command: + +:: + + (lldb) settings show thread-format + thread-format (format-string) = "thread #${thread.index}: tid = ${thread.id%tid}{, ${frame.pc}}{ ${module.file.basename}{`${function.name-with-args}{${frame.no-debug}${function.pc-offset}}}}{ at ${line.file.basename}:${line.number}}{, name = '${thread.name}'}{, queue = '${thread.queue}'}{, activity = '${thread.info.activity.name}'}{, ${thread.info.trace_messages} messages}{, stop reason = ${thread.stop-reason}}{\nReturn value: ${thread.return-value}}{\nCompleted expression: ${thread.completed-expression}}\n" + (lldb) settings show frame-format + frame-format (format-string) = "frame #${frame.index}:{ ${frame.no-debug}${frame.pc}}{ ${module.file.basename}{`${function.name-with-args}{${frame.no-debug}${function.pc-offset}}}}{ at ${line.file.basename}:${line.number}}{${function.is-optimized} [opt]}\n" + +When making thread formats, you will need surround any of the information that +comes from a stack frame with scopes ({ frame-content }) as the thread format +doesn't always want to show frame information. When displaying the backtrace +for a thread, we don't need to duplicate the information for frame zero in the +thread information: + +:: + + (lldb) thread backtrace + thread #1: tid = 0x2e03, stop reason = breakpoint 1.1 2.1 + frame #0: 0x0000000100000e85 a.out`main + 4 at test.c:19 + frame #1: 0x0000000100000e40 a.out`start + 52 + +The frame related variables are: + +- ``${file.*}`` +- ``${frame.*}`` +- ``${function.*}`` +- ``${line.*}`` +- ``${module.*}`` + + +Looking at the default format for the thread, and underlining the frame +information: + +:: + + thread #${thread.index}: tid = ${thread.id}{, ${frame.pc}}{ ${module.file.basename}`${function.name}{${function.pc-offset}}}{, stop reason = ${thread.stop-reason}}{, name = ${thread.name}}{, queue = ${thread.queue}}\n + + +We can see that all frame information is contained in scopes so that when the +thread information is displayed in a context where we only want to show thread +information, we can do so. + +For both thread and frame formats, you can use ${script.target:python_func}, +${script.process:python_func} and ${script.thread:python_func} (and of course +${script.frame:python_func} for frame formats) In all cases, the signature of +python_func is expected to be: + +:: + + def python_func(object,unused): + ... + return string + +Where object is an instance of the SB class associated to the keyword you are +using. + +e.g. Assuming your function looks like: + +:: + + def thread_printer_func (thread,unused): + return "Thread %s has %d frames\n" % (thread.name, thread.num_frames) + +And you set it up with: + +:: + + (lldb) settings set thread-format "${script.thread:thread_printer_func}" + +you would see output like: + +:: + + * Thread main has 21 frames + Index: docs/use/map.rst =================================================================== --- /dev/null +++ docs/use/map.rst @@ -0,0 +1,867 @@ +GDB to LLDB command map +======================= + +Below is a table of GDB commands with the LLDB counterparts. The built in +GDB-compatibility aliases in LLDB are also listed. The full lldb command names +are often long, but any unique short form can be used. Instead of "**breakpoint +set**", "**br se**" is also acceptable. + +.. contents:: + :local: + +Execution Commands +------------------ + ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| GDB | LLDB | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Launch a process no arguments. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** run | **(lldb)** process launch | +| | | +| | | +| **(gdb)** r | **(lldb)** run | +| | | +| | | +| | **(lldb)** r | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Launch a process with arguments . | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** run | **(lldb)** process launch -- | +| | | +| **(gdb)** r | **(lldb)** r | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Launch a process for with arguments **a.out 1 2 3** without having to supply the args every time. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **%** gdb --args a.out 1 2 3 | **%** lldb -- a.out 1 2 3 | +| | | +| | | +| **(gdb)** run | **(lldb)** run | +| | | +| ... | ... | +| | | +| | | +| **(gdb)** run | **(lldb)** run | +| | | +| ... | ... | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Or: | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** set args 1 2 3 | **(lldb)** settings set target.run-args 1 2 3 | +| | | +| | | +| **(gdb)** run | **(lldb)** run | +| | | +| ... | ... | +| | | +| | | +| **(gdb)** run | **(lldb)** run | +| | | +| ... | ... | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Launch a process with arguments in new terminal window (Mac OS X only). | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| | **(lldb)** process launch --tty -- | +| | | +| | **(lldb)** pro la -t -- | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Launch a process with arguments in existing terminal | +| /dev/ttys006 (Mac OS X only). | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| | **(lldb)** process launch --tty=/dev/ttys006 -- | +| | | +| | **(lldb)** pro la -t/dev/ttys006 -- | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Set environment variables for process before launching. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** set env DEBUG 1 | **(lldb)** settings set target.env-vars DEBUG=1 | +| | | +| | | +| | **(lldb)** set se target.env-vars DEBUG=1 | +| | | +| | | +| | **(lldb)** env DEBUG=1 | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Unset environment variables for process before launching. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** unset env DEBUG | **(lldb)** settings remove target.env-vars DEBUG | +| | | +| | | +| | **(lldb)** set rem target.env-vars DEBUG | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Show the arguments that will be or were passed to the program when run. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** show args | **(lldb)** settings show target.run-args | +| | | +| Argument list to give program being debugged when it is started is "1 2 3". | target.run-args (array of strings) = | +| | | +| | [0]: "1" | +| | | +| | [1]: "2" | +| | | +| | [2]: "3" | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Set environment variables for process and launch process in one command. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| | **(lldb)** process launch -v DEBUG=1 | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Attach to a process with process ID 123. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** attach 123 | **(lldb)** process attach --pid 123 | +| | | +| | | +| | **(lldb)** attach -p 123 | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Attach to a process named "a.out". | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** attach a.out | **(lldb)** process attach --name a.out | +| | | +| | | +| | **(lldb)** pro at -n a.out | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Wait for a process named "a.out" to launch and attach. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** attach -waitfor a.out | **(lldb)** process attach --name a.out --waitfor | +| | | +| | | +| | **(lldb)** pro at -n a.out -w | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Attach to a remote gdb protocol server running on system "eorgadd", port 8000. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** target remote eorgadd:8000 | **(lldb)** gdb-remote eorgadd:8000 | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Attach to a remote gdb protocol server running on the local system, port 8000. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** target remote localhost:8000 | **(lldb)** gdb-remote 8000 | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Attach to a Darwin kernel in kdp mode on system "eorgadd". | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** kdp-reattach eorgadd | **(lldb)** kdp-remote eorgadd | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Do a source level single step in the currently selected thread. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** step | **(lldb)** thread step-in | +| | | +| | | +| **(gdb)** s | **(lldb)** step | +| | | +| | | +| | **(lldb)** s | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Do a source level single step over in the currently selected thread. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** next | **(lldb)** thread step-over | +| | | +| | | +| **(gdb)** n | **(lldb)** next | +| | | +| | | +| | **(lldb)** n | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Do an instruction level single step in the currently selected thread. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** stepi | **(lldb)** thread step-inst | +| | | +| | | +| **(gdb)** si | **(lldb)** si | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Do an instruction level single step over in the currently selected thread. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** nexti | **(lldb)** thread step-inst-over | +| | | +| | | +| **(gdb)** ni | **(lldb)** ni | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Step out of the currently selected frame. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** finish | **(lldb)** thread step-out | +| | | +| | | +| | **(lldb)** finish | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Return immediately from the currently selected frame, with an optional return value. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** return | **(lldb)** thread return | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Backtrace and disassemble every time you stop. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| | **(lldb)** target stop-hook add | +| | | +| | Enter your stop hook command(s). Type 'DONE' to end. | +| | | +| | > bt | +| | | +| | > disassemble --pc | +| | | +| | > DONE | +| | | +| | Stop hook #1 added. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| Run until we hit line **12** or control leaves the current function. | ++------------------------------------------------------------------------------+-------------------------------------------------------+ +| **(gdb)** until 12 | **(lldb)** thread until 12 | ++------------------------------------------------------------------------------+-------------------------------------------------------+ + +Breakpoint Commands +------------------- + ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| GDB | LLDB | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Set a breakpoint at all functions named **main**. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** break main | **(lldb)** breakpoint set --name main | +| | | +| | | +| | **(lldb)** br s -n main | +| | | +| | | +| | **(lldb)** b main | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Set a breakpoint in file **test.c** at line **12**. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** break test.c:12 | **(lldb)** breakpoint set --file test.c --line 12 | +| | | +| | | +| | **(lldb)** br s -f test.c -l 12 | +| | | +| | | +| | **(lldb)** b test.c:12 | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Set a breakpoint at all C++ methods whose basename is **main**. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** break main | **(lldb)** breakpoint set --method main | +| | | +| | | +| *(Hope that there are no C functions named **main**)*. | **(lldb)** br s -M main | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Set a breakpoint at and object C function: **-[NSString stringWithFormat:]**. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** break -[NSString stringWithFormat:] | **(lldb)** breakpoint set --name "-[NSString stringWithFormat:]" | +| | | +| | | +| | **(lldb)** b -[NSString stringWithFormat:] | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Set a breakpoint at all Objective-C methods whose selector is **count**. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** break count | **(lldb)** breakpoint set --selector count | +| | | +| | | +| *(Hope that there are no C or C++ functions named **count**)*. | **(lldb)** br s -S count | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Set a breakpoint by regular expression on function name. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** rbreak regular-expression | **(lldb)** breakpoint set --func-regex regular-expression | +| | | +| | | +| | **(lldb)** br s -r regular-expression | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Ensure that breakpoints by file and line work for #included .c/.cpp/.m files. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** b foo.c:12 | **(lldb)** settings set target.inline-breakpoint-strategy always | +| | | +| | | +| | **(lldb)** br s -f foo.c -l 12 | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Set a breakpoint by regular expression on source file contents. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** shell grep -e -n pattern source-file | **(lldb)** breakpoint set --source-pattern regular-expression --file SourceFile | +| | | +| | | +| **(gdb)** break source-file:CopyLineNumbers | **(lldb)** br s -p regular-expression -f file | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Set a conditional breakpoint | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** break foo if strcmp(y,"hello") == 0 | **(lldb)** breakpoint set --name foo --condition '(int)strcmp(y,"hello") == 0' | +| | | +| | | +| | **(lldb)** br s -n foo -c '(int)strcmp(y,"hello") == 0' | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| List all breakpoints. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** info break | **(lldb)** breakpoint list | +| | | +| | | +| | **(lldb)** br l | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| Delete a breakpoint. | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ +| **(gdb)** delete 1 | **(lldb)** breakpoint delete 1 | +| | | +| | | +| | **(lldb)** br del 1 | ++----------------------------------------------------------------+---------------------------------------------------------------------------------+ + +Watchpoint Commands +------------------- + ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| GDB | LLDB | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Set a watchpoint on a variable when it is written to. | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** watch global_var | **(lldb)** watchpoint set variable global_var | +| | | +| | | +| | **(lldb)** wa s v global_var | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Set a watchpoint on a memory location when it is written into. The size of the region to watch for defaults to the pointer size if no '-x byte_size' is specified. This command takes raw input, evaluated as an expression returning an unsigned integer pointing to the start of the region, after the '--' option terminator. | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** watch -location g_char_ptr | **(lldb)** watchpoint set expression -- my_ptr | +| | | +| | | +| | **(lldb)** wa s e -- my_ptr | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Set a condition on a watchpoint. | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| | **(lldb)** watch set var global | +| | | +| | | +| | **(lldb)** watchpoint modify -c '(global==5)' | +| | | +| | | +| | **(lldb)** c | +| | | +| | ... | +| | | +| | | +| | **(lldb)** bt | +| | | +| | * thread #1: tid = 0x1c03, 0x0000000100000ef5 a.out`modify + 21 at main.cpp:16, stop reason = watchpoint 1 | +| | | +| | frame #0: 0x0000000100000ef5 a.out`modify + 21 at main.cpp:16 | +| | | +| | frame #1: 0x0000000100000eac a.out`main + 108 at main.cpp:25 | +| | | +| | frame #2: 0x00007fff8ac9c7e1 libdyld.dylib`start + 1 | +| | | +| | | +| | **(lldb)** frame var global | +| | | +| | (int32_t) global = 5 | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| List all watchpoints. | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** info break | **(lldb)** watchpoint list | +| | | +| | | +| | **(lldb)** watch l | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Delete a watchpoint. | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** delete 1 | **(lldb)** watchpoint delete 1 | +| | | +| | | +| | **(lldb)** watch del 1 | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Examining Variables +------------------- + ++------------------------+----------------------------------------------------------------------------------------+ +| GDB | LLDB | ++------------------------+----------------------------------------------------------------------------------------+ +| Show the arguments and local variables for the current frame. | ++------------------------+----------------------------------------------------------------------------------------+ +| **(gdb)** info args | **(lldb)** frame variable | +| | | +| and | | +| | **(lldb)** fr v | +| | | +| **(gdb)** info locals | | ++------------------------+----------------------------------------------------------------------------------------+ +| Show the local variables for the current frame. | ++------------------------+----------------------------------------------------------------------------------------+ +| **(gdb)** info locals | **(lldb)** frame variable --no-args | +| | | +| | | +| | **(lldb)** fr v -a | ++------------------------+----------------------------------------------------------------------------------------+ +| Show the contents of local variable "bar". | ++------------------------+----------------------------------------------------------------------------------------+ +| **(gdb)** p bar | **(lldb)** frame variable bar | +| | | +| | | +| | **(lldb)** fr v bar | +| | | +| | | +| | **(lldb)** p bar | ++------------------------+----------------------------------------------------------------------------------------+ +| Show the contents of local variable "bar" formatted as hex. | ++------------------------+----------------------------------------------------------------------------------------+ +| **(gdb)** p/x bar | **(lldb)** frame variable --format x bar | +| | | +| | | +| | **(lldb)** fr v -f x bar | ++------------------------+----------------------------------------------------------------------------------------+ +| Show the contents of global variable "baz". | ++------------------------+----------------------------------------------------------------------------------------+ +| **(gdb)** p baz | **(lldb)** target variable baz | +| | | +| | | +| | **(lldb)** ta v baz | ++------------------------+----------------------------------------------------------------------------------------+ +| Show the global/static variables defined in the current source file. | ++------------------------+----------------------------------------------------------------------------------------+ +| n/a | **(lldb)** target variable | +| | | +| | | +| | **(lldb)** ta v | ++------------------------+----------------------------------------------------------------------------------------+ +| Display the variables "argc" and "argv" every time you stop. | ++------------------------+----------------------------------------------------------------------------------------+ +| **(gdb)** display argc | **(lldb)** target stop-hook add --one-liner "frame variable argc argv" | +| | | +| | | +| **(gdb)** display argv | **(lldb)** ta st a -o "fr v argc argv" | +| | | +| | | +| | **(lldb)** display argc | +| | | +| | | +| | **(lldb)** display argv | ++------------------------+----------------------------------------------------------------------------------------+ +| Display the variables "argc" and "argv" only when you stop in the function named **main**. | ++------------------------+----------------------------------------------------------------------------------------+ +| | **(lldb)** target stop-hook add --name main --one-liner "frame variable argc argv" | +| | | +| | | +| | **(lldb)** ta st a -n main -o "fr v argc argv" | ++------------------------+----------------------------------------------------------------------------------------+ +| Display the variable "\*this" only when you stop in c class named **MyClass**. | ++------------------------+----------------------------------------------------------------------------------------+ +| | **(lldb)** target stop-hook add --classname MyClass --one-liner "frame variable \*this"| +| | | +| | | +| | **(lldb)** ta st a -c MyClass -o "fr v \*this" | ++------------------------+----------------------------------------------------------------------------------------+ + +Evaluating Expressions +---------------------- + ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| GDB | LLDB | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| Evaluating a generalized expression in the current frame. | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| **(gdb)** print (int) printf ("Print nine: %d.", 4 + 5) | **(lldb)** expr (int) printf ("Print nine: %d.", 4 + 5) | +| | | +| or if you don't want to see void returns: | or using the print alias: | +| | | +| | | +| **(gdb)** call (int) printf ("Print nine: %d.", 4 + 5) | **(lldb)** print (int) printf ("Print nine: %d.", 4 + 5) | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| Creating and assigning a value to a convenience variable. | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| **(gdb)** set $foo = 5 | In lldb you evaluate a variable declaration expression as you would write it in C: | +| | | +| | | +| **(gdb)** set variable $foo = 5 | **(lldb)** expr unsigned int $foo = 5 | +| | | +| or using the print command | | +| | | +| | | +| **(gdb)** print $foo = 5 | | +| | | +| or using the call command | | +| | | +| | | +| **(gdb)** call $foo = 5 | | +| | | +| and if you want to specify the type of the variable: | | +| **(gdb)** set $foo = (unsigned int) 5 | | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| Printing the ObjC "description" of an object. | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| **(gdb)** po [SomeClass returnAnObject] | **(lldb)** expr -o -- [SomeClass returnAnObject] | +| | | +| | or using the po alias: | +| | | +| | | +| | **(lldb)** po [SomeClass returnAnObject] | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| Print the dynamic type of the result of an expression. | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| **(gdb)** set print object 1 | **(lldb)** expr -d 1 -- [SomeClass returnAnObject] | +| | | +| | | +| **(gdb)** p someCPPObjectPtrOrReference | **(lldb)** expr -d 1 -- someCPPObjectPtrOrReference | +| | | +| only works for C++ objects. | or set dynamic type printing to be the default: | +| | **(lldb)** settings set target.prefer-dynamic run-target | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| Calling a function so you can stop at a breakpoint in the function. | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| **(gdb)** set unwindonsignal 0 | **(lldb)** expr -i 0 -- function_with_a_breakpoint() | +| | | +| | | +| **(gdb)** p function_with_a_breakpoint() | | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| Calling a function that crashes, and stopping when the function crashes. | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ +| **(gdb)** set unwindonsignal 0 | **(lldb)** expr -u 0 -- function_which_crashes() | +| | | +| | | +| **(gdb)** p function_which_crashes() | | ++---------------------------------------------------------+------------------------------------------------------------------------------------+ + +Examining Thread State +---------------------- + ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| GDB | LLDB | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| List the threads in your program. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** info threads | **(lldb)** thread list | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Select thread 1 as the default thread for subsequent commands. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** thread 1 | **(lldb)** thread select 1 | +| | | +| | | +| | **(lldb)** t 1 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Show the stack backtrace for the current thread. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** bt | **(lldb)** thread backtrace | +| | | +| | | +| | **(lldb)** bt | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Show the stack backtraces for all threads. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** thread apply all bt | **(lldb)** thread backtrace all | +| | | +| | | +| | **(lldb)** bt all | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Backtrace the first five frames of the current thread. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** bt 5 | **(lldb)** thread backtrace -c 5 | +| | | +| | | +| | **(lldb)** bt 5 (*lldb-169 and later*) | +| | | +| | | +| | **(lldb)** bt -c 5 (*lldb-168 and earlier*) | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Select a different stack frame by index for the current thread. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** frame 12 | **(lldb)** frame select 12 | +| | | +| | | +| | **(lldb)** fr s 12 | +| | | +| | | +| | **(lldb)** f 12 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| List information about the currently selected frame in the current thread. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| | **(lldb)** frame info | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Select the stack frame that called the current stack frame. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** up | **(lldb)** up | +| | | +| | | +| | **(lldb)** frame select --relative=1 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Select the stack frame that is called by the current stack frame. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** down | **(lldb)** down | +| | | +| | | +| | **(lldb)** frame select --relative=-1 | +| | | +| | | +| | **(lldb)** fr s -r-1 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Select a different stack frame using a relative offset. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** up 2 | **(lldb)** frame select --relative 2 | +| | | +| | | +| **(gdb)** down 3 | **(lldb)** fr s -r2 | +| | | +| | | +| | **(lldb)** frame select --relative -3 | +| | | +| | | +| | **(lldb)** fr s -r-3 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Show the general purpose registers for the current thread. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** info registers | **(lldb)** register read | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Write a new decimal value '123' to the current thread register 'rax'. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** p $rax = 123 | **(lldb)** register write rax 123 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Skip 8 bytes ahead of the current program counter (instruction pointer). Note that we use backticks to evaluate an expression and insert the scalar result in LLDB. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** jump \*$pc+8 | **(lldb)** register write pc `$pc+8` | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Show the general purpose registers for the current thread formatted as **signed decimal**. LLDB tries to use the same format characters as **printf(3)** when possible. Type "help format" to see the full list of format specifiers. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| | **(lldb)** register read --format i | +| | | +| | | +| | **(lldb)** re r -f i | +| | | +| | | +| | *LLDB now supports the GDB shorthand format syntax but there can't be space after the command:* | +| | | +| | **(lldb)** register read/d | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Show all registers in all register sets for the current thread. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** info all-registers | **(lldb)** register read --all | +| | | +| | | +| | **(lldb)** re r -a | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Show the values for the registers named "rax", "rsp" and "rbp" in the current thread. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** info all-registers rax rsp rbp | **(lldb)** register read rax rsp rbp | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Show the values for the register named "rax" in the current thread formatted as **binary**. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** p/t $rax | **(lldb)** register read --format binary rax | +| | | +| | | +| | **(lldb)** re r -f b rax | +| | | +| | | +| | *LLDB now supports the GDB shorthand format syntax but there can't be space after the command:* | +| | | +| | **(lldb)** register read/t rax | +| | | +| | | +| | **(lldb)** p/t $rax | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Read memory from address 0xbffff3c0 and show 4 hex uint32_t values. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** x/4xw 0xbffff3c0 | **(lldb)** memory read --size 4 --format x --count 4 0xbffff3c0 | +| | | +| | | +| | **(lldb)** me r -s4 -fx -c4 0xbffff3c0 | +| | | +| | | +| | **(lldb)** x -s4 -fx -c4 0xbffff3c0 | +| | | +| | | +| | *LLDB now supports the GDB shorthand format syntax but there can't be space after the command:* | +| | | +| | **(lldb)** memory read/4xw 0xbffff3c0 | +| | | +| | | +| | **(lldb)** x/4xw 0xbffff3c0 | +| | | +| | | +| | **(lldb)** memory read --gdb-format 4xw 0xbffff3c0 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Read memory starting at the expression "argv[0]". | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** x argv[0] | **(lldb)** memory read `argv[0]` | +| | | +| | | +| | ***NOTE:** any command can inline a scalar expression result (as long as the target is stopped) using backticks around any expression:* | +| | | +| | **(lldb)** memory read --size `sizeof(int)` `argv[0]` | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Read 512 bytes of memory from address 0xbffff3c0 and save results to a local file as **text**. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** set logging on | **(lldb)** memory read --outfile /tmp/mem.txt --count 512 0xbffff3c0 | +| | | +| | | +| **(gdb)** set logging file /tmp/mem.txt | **(lldb)** me r -o/tmp/mem.txt -c512 0xbffff3c0 | +| | | +| | | +| **(gdb)** x/512bx 0xbffff3c0 | **(lldb)** x/512bx -o/tmp/mem.txt 0xbffff3c0 | +| | | +| | | +| **(gdb)** set logging off | | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Save binary memory data starting at 0x1000 and ending at 0x2000 to a file. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** dump memory /tmp/mem.bin 0x1000 0x2000 | **(lldb)** memory read --outfile /tmp/mem.bin --binary 0x1000 0x2000 | +| | | +| | | +| | **(lldb)** me r -o /tmp/mem.bin -b 0x1000 0x2000 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Get information about a specific heap allocation (available on Mac OS X only). | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** info malloc 0x10010d680 | **(lldb)** command script import lldb.macosx.heap | +| | | +| | | +| | **(lldb)** process launch --environment MallocStackLogging=1 -- [ARGS] | +| | | +| | | +| | **(lldb)** malloc_info --stack-history 0x10010d680 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Get information about a specific heap allocation and cast the result to any dynamic type that can be deduced (available on Mac OS X only) | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| | **(lldb)** command script import lldb.macosx.heap | +| | | +| | | +| | **(lldb)** malloc_info --type 0x10010d680 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Find all heap blocks that contain a pointer specified by an expression EXPR (available on Mac OS X only). | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| | **(lldb)** command script import lldb.macosx.heap | +| | | +| | | +| | **(lldb)** ptr_refs EXPR | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Find all heap blocks that contain a C string anywhere in the block (available on Mac OS X only). | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| | **(lldb)** command script import lldb.macosx.heap | +| | | +| | | +| | **(lldb)** cstr_refs CSTRING | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Disassemble the current function for the current frame. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** disassemble | **(lldb)** disassemble --frame | +| | | +| | | +| | **(lldb)** di -f | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Disassemble any functions named **main**. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** disassemble main | **(lldb)** disassemble --name main | +| | | +| | | +| | **(lldb)** di -n main | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Disassemble an address range. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** disassemble 0x1eb8 0x1ec3 | **(lldb)** disassemble --start-address 0x1eb8 --end-address 0x1ec3 | +| | | +| | | +| | **(lldb)** di -s 0x1eb8 -e 0x1ec3 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Disassemble 20 instructions from a given address. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** x/20i 0x1eb8 | **(lldb)** disassemble --start-address 0x1eb8 --count 20 | +| | | +| | | +| | **(lldb)** di -s 0x1eb8 -c 20 | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Show mixed source and disassembly for the current function for the current frame. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| n/a | **(lldb)** disassemble --frame --mixed | +| | | +| | | +| | **(lldb)** di -f -m | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Disassemble the current function for the current frame and show the opcode bytes. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| n/a | **(lldb)** disassemble --frame --bytes | +| | | +| | | +| | **(lldb)** di -f -b | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Disassemble the current source line for the current frame. | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| n/a | **(lldb)** disassemble --line | +| | | +| | | +| | **(lldb)** di -l | ++------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Executable and Shared Library Query Commands +-------------------------------------------- + ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| GDB | LLDB | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| List the main executable and all dependent shared libraries. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| **(gdb)** info shared | **(lldb)** image list | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Look up information for a raw address in the executable or any shared libraries. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| **(gdb)** info symbol 0x1ec4 | **(lldb)** image lookup --address 0x1ec4 | +| | | +| | | +| | **(lldb)** im loo -a 0x1ec4 | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Look up functions matching a regular expression in a binary. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| **(gdb)** info function | This one finds debug symbols: | +| | | +| | | +| | **(lldb)** image lookup -r -n | +| | | +| | This one finds non-debug symbols: | +| | | +| | | +| | **(lldb)** image lookup -r -s | +| | | +| | Provide a list of binaries as arguments to limit the search. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Find full source line information. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| **(gdb)** info line 0x1ec4 | This one is a bit messy at present. Do: | +| | | +| | | +| | **(lldb)** image lookup -v --address 0x1ec4 | +| | | +| | | +| | and look for the LineEntry line, which will have the full source path and line range information. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Look up information for an address in **a.out** only. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| | **(lldb)** image lookup --address 0x1ec4 a.out | +| | | +| | | +| | **(lldb)** im loo -a 0x1ec4 a.out | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Look up information for for a type Point by name. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| **(gdb)** ptype Point | **(lldb)** image lookup --type Point | +| | | +| | | +| | **(lldb)** im loo -t Point | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Dump all sections from the main executable and any shared libraries. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| **(gdb)** maintenance info sections | **(lldb)** image dump sections | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Dump all sections in the **a.out** module. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| | **(lldb)** image dump sections a.out | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Dump all symbols from the main executable and any shared libraries. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| | **(lldb)** image dump symtab | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| Dump all symbols in **a.out** and **liba.so**. | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ +| | **(lldb)** image dump symtab a.out liba.so | ++-------------------------------------+----------------------------------------------------------------------------------------------------+ + +Miscellaneous +------------- + ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| GDB | LLDB | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Search command help for a keyword. | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** apropos keyword | **(lldb)** apropos keyword | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Echo text to the screen. | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** echo Here is some text\n | **(lldb)** script print "Here is some text" | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Remap source file pathnames for the debug session. If your source files are no longer located in the same location as when the program was built --- maybe the program was built on a different computer --- you need to tell the debugger how to find the sources at their local file path instead of the build system's file path. | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** set pathname-substitutions /buildbot/path /my/path | **(lldb)** settings set target.source-map /buildbot/path /my/path | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Supply a catchall directory to search for source files in. | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| **(gdb)** directory /my/path | (*No equivalent command - use the source-map instead.*) | ++----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Index: docs/use/python.rst =================================================================== --- /dev/null +++ docs/use/python.rst @@ -0,0 +1,801 @@ +Python Scripting +================ + +LLDB has been structured from the beginning to be scriptable in two +ways -- a Unix Python session can initiate/run a debug session +non-interactively using LLDB; and within the LLDB debugger tool, Python +scripts can be used to help with many tasks, including inspecting +program data, iterating over containers and determining if a breakpoint +should stop execution or continue. This document will show how to do +some of these things by going through an example, explaining how to use +Python scripting to find a bug in a program that searches for text in a +large binary tree. + +.. contents:: + :local: + +The Test Program and Input +-------------------------- + +We have a simple C program (dictionary.c) that reads in a text file, +and stores all the words from the file in a Binary Search Tree, sorted +alphabetically. It then enters a loop prompting the user for a word, +searching for the word in the tree (using Binary Search), and reporting +to the user whether or not it found the word in the tree. + +The input text file we are using to test our program contains the text +for William Shakespeare's famous tragedy "Romeo and Juliet". + +The Bug +------- + +When we try running our program, we find there is a problem. While it +successfully finds some of the words we would expect to find, such as +"love" or "sun", it fails to find the word "Romeo", which MUST be in +the input text file: + +:: + + % ./dictionary Romeo-and-Juliet.txt + Dictionary loaded. + Enter search word: love + Yes! + Enter search word: sun + Yes! + Enter search word: Romeo + No! + Enter search word: ^D + % + +Using Depth First Search +------------------------ + +Our first job is to determine if the word "Romeo" actually got inserted +into the tree or not. Since "Romeo and Juliet" has thousands of words, +trying to examine our binary search tree by hand is completely +impractical. Therefore we will write a Python script to search the tree +for us. We will write a recursive Depth First Search function that +traverses the entire tree searching for a word, and maintaining +information about the path from the root of the tree to the current +node. If it finds the word in the tree, it returns the path from the +root to the node containing the word. This is what our DFS function in +Python would look like, with line numbers added for easy reference in +later explanations: + +:: + + 1: def DFS (root, word, cur_path): + 2: root_word_ptr = root.GetChildMemberWithName ("word") + 3: left_child_ptr = root.GetChildMemberWithName ("left") + 4: right_child_ptr = root.GetChildMemberWithName ("right") + 5: root_word = root_word_ptr.GetSummary() + 6: end = len (root_word) - 1 + 7: if root_word[0] == '"' and root_word[end] == '"': + 8: root_word = root_word[1:end] + 9: end = len (root_word) - 1 + 10: if root_word[0] == '\'' and root_word[end] == '\'': + 11: root_word = root_word[1:end] + 12: if root_word == word: + 13: return cur_path + 14: elif word < root_word: + 15: if left_child_ptr.GetValue() == None: + 16: return "" + 17: else: + 18: cur_path = cur_path + "L" + 19: return DFS (left_child_ptr, word, cur_path) + 20: else: + 21: if right_child_ptr.GetValue() == None: + 22: return "" + 23: else: + 24: cur_path = cur_path + "R" + 25: return DFS (right_child_ptr, word, cur_path) + + +Accessing & Manipulating Program Variables +------------------------------------------ + +Before we can call any Python function on any of our program's +variables, we need to get the variable into a form that Python can +access. To show you how to do this we will look at the parameters for +the DFS function. The first parameter is going to be a node in our +binary search tree, put into a Python variable. The second parameter is +the word we are searching for (a string), and the third parameter is a +string representing the path from the root of the tree to our current +node. + +The most interesting parameter is the first one, the Python variable +that needs to contain a node in our search tree. How can we take a +variable out of our program and put it into a Python variable? What +kind of Python variable will it be? The answers are to use the LLDB API +functions, provided as part of the LLDB Python module. Running Python +from inside LLDB, LLDB will automatically give us our current frame +object as a Python variable, "lldb.frame". This variable has the type +"SBFrame" (see the LLDB API for more information about SBFrame +objects). One of the things we can do with a frame object, is to ask it +to find and return its local variable. We will call the API function +"FindVariable" on the lldb.frame object to give us our dictionary +variable as a Python variable: + +:: + + root = lldb.frame.FindVariable ("dictionary") + +The line above, executed in the Python script interpreter in LLDB, asks the +current frame to find the variable named "dictionary" and return it. We then +store the returned value in the Python variable named "root". This answers the +question of HOW to get the variable, but it still doesn't explain WHAT actually +gets put into "root". If you examine the LLDB API, you will find that the +SBFrame method "FindVariable" returns an object of type SBValue. SBValue +objects are used, among other things, to wrap up program variables and values. +There are many useful methods defined in the SBValue class to allow you to get +information or children values out of SBValues. For complete information, see +the header file SBValue.h. The SBValue methods that we use in our DFS function +are ``GetChildMemberWithName()``, ``GetSummary()``, and ``GetValue()``. + + +Explaining DFS Script in Detail +------------------------------- + +Before diving into the details of this code, it would be best to give a +high-level overview of what it does. The nodes in our binary search tree were +defined to have type ``tree_node *``, which is defined as: + +:: + + typedef struct tree_node + { + const char *word; + struct tree_node *left; + struct tree_node *right; + } tree_node; + +Lines 2-11 of DFS are getting data out of the current tree node and getting +ready to do the actual search; lines 12-25 are the actual depth-first search. +Lines 2-4 of our DFS function get the word, left and right fields out of the +current node and store them in Python variables. Since root_word_ptr is a +pointer to our word, and we want the actual word, line 5 calls GetSummary() to +get a string containing the value out of the pointer. Since GetSummary() adds +quotes around its result, lines 6-11 strip surrounding quotes off the word. + +Line 12 checks to see if the word in the current node is the one we are +searching for. If so, we are done, and line 13 returns the current path. +Otherwise, line 14 checks to see if we should go left (search word comes before +the current word). If we decide to go left, line 15 checks to see if the left +pointer child is NULL ("None" is the Python equivalent of NULL). If the left +pointer is NULL, then the word is not in this tree and we return an empty path +(line 16). Otherwise, we add an "L" to the end of our current path string, to +indicate we are going left (line 18), and then recurse on the left child (line +19). Lines 20-25 are the same as lines 14-19, except for going right rather +than going left. + +One other note: Typing something as long as our DFS function directly into the +interpreter can be difficult, as making a single typing mistake means having to +start all over. Therefore we recommend doing as we have done: Writing your +longer, more complicated script functions in a separate file (in this case +tree_utils.py) and then importing it into your LLDB Python interpreter. + + +The DFS Script in Action +------------------------ + +At this point we are ready to use the DFS function to see if the word "Romeo" +is in our tree or not. To actually use it in LLDB on our dictionary program, +you would do something like this: + +:: + + % lldb + (lldb) process attach -n "dictionary" + Architecture set to: x86_64. + Process 521 stopped + * thread #1: tid = 0x2c03, 0x00007fff86c8bea0 libSystem.B.dylib`read$NOCANCEL + 8, stop reason = signal SIGSTOP + frame #0: 0x00007fff86c8bea0 libSystem.B.dylib`read$NOCANCEL + 8 + (lldb) breakpoint set -n find_word + Breakpoint created: 1: name = 'find_word', locations = 1, resolved = 1 + (lldb) continue + Process 521 resuming + Process 521 stopped + * thread #1: tid = 0x2c03, 0x0000000100001830 dictionary`find_word + 16 + at dictionary.c:105, stop reason = breakpoint 1.1 + frame #0: 0x0000000100001830 dictionary`find_word + 16 at dictionary.c:105 + 102 int + 103 find_word (tree_node *dictionary, char *word) + 104 { + -> 105 if (!word || !dictionary) + 106 return 0; + 107 + 108 int compare_value = strcmp (word, dictionary->word); + (lldb) script + Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. + >>> import tree_utils + >>> root = lldb.frame.FindVariable ("dictionary") + >>> current_path = "" + >>> path = tree_utils.DFS (root, "Romeo", current_path) + >>> print path + LLRRL + >>> ^D + (lldb) + +The first bit of code above shows starting lldb, attaching to the dictionary +program, and getting to the find_word function in LLDB. The interesting part +(as far as this example is concerned) begins when we enter the script command +and drop into the embedded interactive Python interpreter. We will go over this +Python code line by line. The first line + +:: + + import tree_utils + + +imports the file where we wrote our DFS function, tree_utils.py, into Python. +Notice that to import the file we leave off the ".py" extension. We can now +call any function in that file, giving it the prefix "tree_utils.", so that +Python knows where to look for the function. The line + +:: + + root = lldb.frame.FindVariable ("dictionary") + + +gets our program variable "dictionary" (which contains the binary search tree) +and puts it into the Python variable "root". See Accessing & Manipulating +Program Variables in Python above for more details about how this works. The +next line is + +:: + + current_path = "" + +This line initializes the current_path from the root of the tree to our current +node. Since we are starting at the root of the tree, our current path starts as +an empty string. As we go right and left through the tree, the DFS function +will append an 'R' or an 'L' to the current path, as appropriate. The line + +:: + + path = tree_utils.DFS (root, "Romeo", current_path) + +calls our DFS function (prefixing it with the module name so that Python can +find it). We pass in our binary tree stored in the variable root, the word we +are searching for, and our current path. We assign whatever path the DFS +function returns to the Python variable path. + +Finally, we want to see if the word was found or not, and if so we want to see +the path through the tree to the word. So we do + +:: + + print path + +From this we can see that the word "Romeo" was indeed found in the tree, and +the path from the root of the tree to the node containing "Romeo" is +left-left-right-right-left. + +Using Breakpoint Command Scripts +-------------------------------- + +We are halfway to figuring out what the problem is. We know the word we are +looking for is in the binary tree, and we know exactly where it is in the +binary tree. Now we need to figure out why our binary search algorithm is not +finding the word. We will do this using breakpoint command scripts. + +The idea is as follows. The binary search algorithm has two main decision +points: the decision to follow the right branch; and, the decision to follow +the left branch. We will set a breakpoint at each of these decision points, and +attach a Python breakpoint command script to each breakpoint. The breakpoint +commands will use the global path Python variable that we got from our DFS +function. Each time one of these decision breakpoints is hit, the script will +compare the actual decision with the decision the front of the path variable +says should be made (the first character of the path). If the actual decision +and the path agree, then the front character is stripped off the path, and +execution is resumed. In this case the user never even sees the breakpoint +being hit. But if the decision differs from what the path says it should be, +then the script prints out a message and does NOT resume execution, leaving the +user sitting at the first point where a wrong decision is being made. + +Python Breakpoint Command Scripts Are Not What They Seem +-------------------------------------------------------- + +What do we mean by that? When you enter a Python breakpoint command in LLDB, it +appears that you are entering one or more plain lines of Python. BUT LLDB then +takes what you entered and wraps it into a Python FUNCTION (just like using the +"def" Python command). It automatically gives the function an obscure, unique, +hard-to-stumble-across function name, and gives it two parameters: frame and +bp_loc. When the breakpoint gets hit, LLDB wraps up the frame object where the +breakpoint was hit, and the breakpoint location object for the breakpoint that +was hit, and puts them into Python variables for you. It then calls the Python +function that was created for the breakpoint command, and passes in the frame +and breakpoint location objects. + +So, being practical, what does this mean for you when you write your Python +breakpoint commands? It means that there are two things you need to keep in +mind: 1. If you want to access any Python variables created outside your +script, you must declare such variables to be global. If you do not declare +them as global, then the Python function will treat them as local variables, +and you will get unexpected behavior. 2. All Python breakpoint command scripts +automatically have a frame and a bp_loc variable. The variables are pre-loaded +by LLDB with the correct context for the breakpoint. You do not have to use +these variables, but they are there if you want them. + +The Decision Point Breakpoint Commands +-------------------------------------- + +This is what the Python breakpoint command script would look like for the +decision to go right: + +:: + + global path + if path[0] == 'R': + path = path[1:] + thread = frame.GetThread() + process = thread.GetProcess() + process.Continue() + else: + print "Here is the problem; going right, should go left!" + Just as a reminder, LLDB is going to take this script and wrap it up in a function, like this: + + + def some_unique_and_obscure_function_name (frame, bp_loc): + global path + if path[0] == 'R': + path = path[1:] + thread = frame.GetThread() + process = thread.GetProcess() + process.Continue() + else: + print "Here is the problem; going right, should go left!" + +LLDB will call the function, passing in the correct frame and breakpoint +location whenever the breakpoint gets hit. There are several things to notice +about this function. The first one is that we are accessing and updating a +piece of state (the path variable), and actually conditioning our behavior +based upon this variable. Since the variable was defined outside of our script +(and therefore outside of the corresponding function) we need to tell Python +that we are accessing a global variable. That is what the first line of the +script does. Next we check where the path says we should go and compare it to +our decision (recall that we are at the breakpoint for the decision to go +right). If the path agrees with our decision, then we strip the first character +off of the path. + +Since the decision matched the path, we want to resume execution. To do this we +make use of the frame parameter that LLDB guarantees will be there for us. We +use LLDB API functions to get the current thread from the current frame, and +then to get the process from the thread. Once we have the process, we tell it +to resume execution (using the Continue() API function). + +If the decision to go right does not agree with the path, then we do not resume +execution. We allow the breakpoint to remain stopped (by doing nothing), and we +print an informational message telling the user we have found the problem, and +what the problem is. + +Actually Using The Breakpoint Commands +-------------------------------------- + +Now we will look at what happens when we actually use these breakpoint commands +on our program. Doing a source list -n find_word shows us the function +containing our two decision points. Looking at the code below, we see that we +want to set our breakpoints on lines 113 and 115: + +:: + + (lldb) source list -n find_word + File: /Volumes/Data/HD2/carolinetice/Desktop/LLDB-Web-Examples/dictionary.c. + 101 + 102 int + 103 find_word (tree_node *dictionary, char *word) + 104 { + 105 if (!word || !dictionary) + 106 return 0; + 107 + 108 int compare_value = strcmp (word, dictionary->word); + 109 + 110 if (compare_value == 0) + 111 return 1; + 112 else if (compare_value < 0) + 113 return find_word (dictionary->left, word); + 114 else + 115 return find_word (dictionary->right, word); + 116 } + 117 + + +So, we set our breakpoints, enter our breakpoint command scripts, and see what happens: + +:: + + (lldb) breakpoint set -l 113 + Breakpoint created: 2: file ='dictionary.c', line = 113, locations = 1, resolved = 1 + (lldb) breakpoint set -l 115 + Breakpoint created: 3: file ='dictionary.c', line = 115, locations = 1, resolved = 1 + (lldb) breakpoint command add -s python 2 + Enter your Python command(s). Type 'DONE' to end. + > global path + > if (path[0] == 'L'): + > path = path[1:] + > thread = frame.GetThread() + > process = thread.GetProcess() + > process.Continue() + > else: + > print "Here is the problem. Going left, should go right!" + > DONE + (lldb) breakpoint command add -s python 3 + Enter your Python command(s). Type 'DONE' to end. + > global path + > if (path[0] == 'R'): + > path = path[1:] + > thread = frame.GetThread() + > process = thread.GetProcess() + > process.Continue() + > else: + > print "Here is the problem. Going right, should go left!" + > DONE + (lldb) continue + Process 696 resuming + Here is the problem. Going right, should go left! + Process 696 stopped + * thread #1: tid = 0x2d03, 0x000000010000189f dictionary`find_word + 127 at dictionary.c:115, stop reason = breakpoint 3.1 + frame #0: 0x000000010000189f dictionary`find_word + 127 at dictionary.c:115 + 112 else if (compare_value < 0) + 113 return find_word (dictionary->left, word); + 114 else + -> 115 return find_word (dictionary->right, word); + 116 } + 117 + 118 void + (lldb) + + +After setting our breakpoints, adding our breakpoint commands and continuing, +we run for a little bit and then hit one of our breakpoints, printing out the +error message from the breakpoint command. Apparently at this point in the +tree, our search algorithm decided to go right, but our path says the node we +want is to the left. Examining the word at the node where we stopped, and our +search word, we see: + +:: + + (lldb) expr dictionary->word + (const char *) $1 = 0x0000000100100080 "dramatis" + (lldb) expr word + (char *) $2 = 0x00007fff5fbff108 "romeo" + +So the word at our current node is "dramatis", and the word we are searching +for is "romeo". "romeo" comes after "dramatis" alphabetically, so it seems like +going right would be the correct decision. Let's ask Python what it thinks the +path from the current node to our word is: + +:: + + (lldb) script print path + LLRRL + +According to Python we need to go left-left-right-right-left from our current +node to find the word we are looking for. Let's double check our tree, and see +what word it has at that node: + +:: + + (lldb) expr dictionary->left->left->right->right->left->word + (const char *) $4 = 0x0000000100100880 "Romeo" + +So the word we are searching for is "romeo" and the word at our DFS location is +"Romeo". Aha! One is uppercase and the other is lowercase: We seem to have a +case conversion problem somewhere in our program (we do). + +This is the end of our example on how you might use Python scripting in LLDB to +help you find bugs in your program. + +Source Files for The Example +---------------------------- + +The complete code for the Dictionary program (with case-conversion bug), the +DFS function and other Python script examples (tree_utils.py) used for this +example are available below. + +tree_utils.py - Example Python functions using LLDB's API, including DFS + +:: + + """ + # ===-- tree_utils.py ---------------------------------------*- Python -*-===// + # + # The LLVM Compiler Infrastructure + # + # This file is distributed under the University of Illinois Open Source + # License. See LICENSE.TXT for details. + # + # ===---------------------------------------------------------------------===// + + tree_utils.py - A set of functions for examining binary + search trees, based on the example search tree defined in + dictionary.c. These functions contain calls to LLDB API + functions, and assume that the LLDB Python module has been + imported. + + For a thorough explanation of how the DFS function works, and + for more information about dictionary.c go to + http://lldb.llvm.org/scripting.html + """ + + + def DFS(root, word, cur_path): + """ + Recursively traverse a binary search tree containing + words sorted alphabetically, searching for a particular + word in the tree. Also maintains a string representing + the path from the root of the tree to the current node. + If the word is found in the tree, return the path string. + Otherwise return an empty string. + + This function assumes the binary search tree is + the one defined in dictionary.c It uses LLDB API + functions to examine and traverse the tree nodes. + """ + + # Get pointer field values out of node 'root' + + root_word_ptr = root.GetChildMemberWithName("word") + left_child_ptr = root.GetChildMemberWithName("left") + right_child_ptr = root.GetChildMemberWithName("right") + + # Get the word out of the word pointer and strip off + # surrounding quotes (added by call to GetSummary). + + root_word = root_word_ptr.GetSummary() + end = len(root_word) - 1 + if root_word[0] == '"' and root_word[end] == '"': + root_word = root_word[1:end] + end = len(root_word) - 1 + if root_word[0] == '\'' and root_word[end] == '\'': + root_word = root_word[1:end] + + # Main depth first search + + if root_word == word: + return cur_path + elif word < root_word: + + # Check to see if left child is NULL + + if left_child_ptr.GetValue() is None: + return "" + else: + cur_path = cur_path + "L" + return DFS(left_child_ptr, word, cur_path) + else: + + # Check to see if right child is NULL + + if right_child_ptr.GetValue() is None: + return "" + else: + cur_path = cur_path + "R" + return DFS(right_child_ptr, word, cur_path) + + + def tree_size(root): + """ + Recursively traverse a binary search tree, counting + the nodes in the tree. Returns the final count. + + This function assumes the binary search tree is + the one defined in dictionary.c It uses LLDB API + functions to examine and traverse the tree nodes. + """ + if (root.GetValue is None): + return 0 + + if (int(root.GetValue(), 16) == 0): + return 0 + + left_size = tree_size(root.GetChildAtIndex(1)) + right_size = tree_size(root.GetChildAtIndex(2)) + + total_size = left_size + right_size + 1 + return total_size + + + def print_tree(root): + """ + Recursively traverse a binary search tree, printing out + the words at the nodes in alphabetical order (the + search order for the binary tree). + + This function assumes the binary search tree is + the one defined in dictionary.c It uses LLDB API + functions to examine and traverse the tree nodes. + """ + if (root.GetChildAtIndex(1).GetValue() is not None) and ( + int(root.GetChildAtIndex(1).GetValue(), 16) != 0): + print_tree(root.GetChildAtIndex(1)) + + print root.GetChildAtIndex(0).GetSummary() + + if (root.GetChildAtIndex(2).GetValue() is not None) and ( + int(root.GetChildAtIndex(2).GetValue(), 16) != 0): + print_tree(root.GetChildAtIndex(2)) + + +dictionary.c - Sample dictionary program, with bug + +:: + + //===-- dictionary.c ---------------------------------------------*- C -*-===// + // + // The LLVM Compiler Infrastructure + // + // This file is distributed under the University of Illinois Open Source + // License. See LICENSE.TXT for details. + // + //===---------------------------------------------------------------------===// + #include + #include + #include + #include + + typedef struct tree_node { + const char *word; + struct tree_node *left; + struct tree_node *right; + } tree_node; + + /* Given a char*, returns a substring that starts at the first + alphabet character and ends at the last alphabet character, i.e. it + strips off beginning or ending quotes, punctuation, etc. */ + + char *strip(char **word) { + char *start = *word; + int len = strlen(start); + char *end = start + len - 1; + + while ((start < end) && (!isalpha(start[0]))) + start++; + + while ((end > start) && (!isalpha(end[0]))) + end--; + + if (start > end) + return NULL; + + end[1] = '\0'; + *word = start; + + return start; + } + + /* Given a binary search tree (sorted alphabetically by the word at + each node), and a new word, inserts the word at the appropriate + place in the tree. */ + + void insert(tree_node *root, char *word) { + if (root == NULL) + return; + + int compare_value = strcmp(word, root->word); + + if (compare_value == 0) + return; + + if (compare_value < 0) { + if (root->left != NULL) + insert(root->left, word); + else { + tree_node *new_node = (tree_node *)malloc(sizeof(tree_node)); + new_node->word = strdup(word); + new_node->left = NULL; + new_node->right = NULL; + root->left = new_node; + } + } else { + if (root->right != NULL) + insert(root->right, word); + else { + tree_node *new_node = (tree_node *)malloc(sizeof(tree_node)); + new_node->word = strdup(word); + new_node->left = NULL; + new_node->right = NULL; + root->right = new_node; + } + } + } + + /* Read in a text file and storea all the words from the file in a + binary search tree. */ + + void populate_dictionary(tree_node **dictionary, char *filename) { + FILE *in_file; + char word[1024]; + + in_file = fopen(filename, "r"); + if (in_file) { + while (fscanf(in_file, "%s", word) == 1) { + char *new_word = (strdup(word)); + new_word = strip(&new_word); + if (*dictionary == NULL) { + tree_node *new_node = (tree_node *)malloc(sizeof(tree_node)); + new_node->word = new_word; + new_node->left = NULL; + new_node->right = NULL; + *dictionary = new_node; + } else + insert(*dictionary, new_word); + } + } + } + + /* Given a binary search tree and a word, search for the word + in the binary search tree. */ + + int find_word(tree_node *dictionary, char *word) { + if (!word || !dictionary) + return 0; + + int compare_value = strcmp(word, dictionary->word); + + if (compare_value == 0) + return 1; + else if (compare_value < 0) + return find_word(dictionary->left, word); + else + return find_word(dictionary->right, word); + } + + /* Print out the words in the binary search tree, in sorted order. */ + + void print_tree(tree_node *dictionary) { + if (!dictionary) + return; + + if (dictionary->left) + print_tree(dictionary->left); + + printf("%s\n", dictionary->word); + + if (dictionary->right) + print_tree(dictionary->right); + } + + int main(int argc, char **argv) { + tree_node *dictionary = NULL; + char buffer[1024]; + char *filename; + int done = 0; + + if (argc == 2) + filename = argv[1]; + + if (!filename) + return -1; + + populate_dictionary(&dictionary, filename); + fprintf(stdout, "Dictionary loaded.\nEnter search word: "); + while (!done && fgets(buffer, sizeof(buffer), stdin)) { + char *word = buffer; + int len = strlen(word); + int i; + + for (i = 0; i < len; ++i) + word[i] = tolower(word[i]); + + if ((len > 0) && (word[len - 1] == '\n')) { + word[len - 1] = '\0'; + len = len - 1; + } + + if (find_word(dictionary, word)) + fprintf(stdout, "Yes!\n"); + else + fprintf(stdout, "No!\n"); + + fprintf(stdout, "Enter search word: "); + } + + fprintf(stdout, "\n"); + return 0; + } + + +The text for "Romeo and Juliet" can be obtained from the Gutenberg Project +(http://www.gutenberg.org). + Index: docs/use/remote.rst =================================================================== --- /dev/null +++ docs/use/remote.rst @@ -0,0 +1,222 @@ +Remote Debugging +================ + +Remote debugging refers to the act of debugging a process which is running on a +different system, than the debugger itself. We shall refer to the system +running the debugger as the local system, while the system running the debugged +process will be the remote system. + +To enable remote debugging, LLDB employs a client-server architecture. The +client part runs on the local system and the remote system runs the server. The +client and server communicate using the gdb-remote protocol, usually +transported over TCP/IP. More information on the protocol can be found here and +the LLDB-specific extensions are documented in docs/lldb-gdb-remote.txt file +inside LLDB source repository. Besides the gdb-remote stub, the server part of +LLDB also consists of a platform binary, which is responsible for performing +advanced debugging operations, like copying files from/to the remote system and +can be used to execute arbitrary shell commands on the remote system. + +In order to reduce code complexity and improve remote debugging experience LLDB +on Linux and OSX uses the remote debugging stub even when debugging a process +locally. This is achieved by spawning a remote stub process locally and +communicating with it over the loopback interface. In the case of local +debugging this whole process is transparent to the user. The platform binary is +not used in this case, since no file transfers are needed. + +Preparation for Remote Debugging +-------------------------------- + +While the process of actual debugging (stepping, backtraces, evaluating +expressions) is same as in the local case, in the case of remote debugging, +more preparation is needed as the required binaries cannot started on the +remote system automatically. Also, if the remote system runs a different OS or +architecture, the server component needs to be compiled separately. + +**Remote system** + +On Linux and Android, all required remote functionality is contained in the +lldb-server binary. This binary combines the functionality of the platform and +gdb-remote stub. A single binary facilitates deployment and reduces code size, +since the two functions share a lot of code. The lldb-server binary is also +statically linked with the rest of LLDB (unlike lldb, which dynamically links +to liblldb.so by default), so it does not have any dependencies on the rest of +lldb. On Mac OSX and iOS, the remote-gdb functionality is implemented by the +debugserver binary, which you will need to deploy alongside lldb-server. + +The binaries mentioned above need to be present on the remote system to enable +remote debugging. You can either compile on the remote system directly or copy +them from the local machine. If compiling locally and the remote architecture +differs from the local one, you will need to cross-compile the correct version +of the binaries. More information on cross-compiling LLDB can be found on the +build page. + +Once the binaries are in place, you just need to run the lldb-server in +platform mode and specify the port it should listen on. For example, the +command + +:: + + remote% lldb-server platform --listen "*:1234" --server + +will start the LLDB platform and wait for incoming connections from any address +to port 1234. Specifying an address instead of * will only allow connections +originating from that address. Adding a --server parameter to the command line +will fork off a new process for every incoming connection, allowing multiple +parallel debug sessions. + +**Local system** + +On the local system, you need to let LLDB know that you intend to do remote +debugging. This is achieved through the platform command and its sub-commands. +As a first step you need to choose the correct platform plug-in for your remote +system. A list of available plug-ins can be obtained through platform list. + +:: + + local% lldb + (lldb) platform list + Available platforms: + host: Local Mac OS X user platform plug-in. + remote-freebsd: Remote FreeBSD user platform plug-in. + remote-linux: Remote Linux user platform plug-in. + remote-netbsd: Remote NetBSD user platform plug-in. + remote-windows: Remote Windows user platform plug-in. + remote-android: Remote Android user platform plug-in. + remote-ios: Remote iOS platform plug-in. + remote-macosx: Remote Mac OS X user platform plug-in. + ios-simulator: iOS simulator platform plug-in. + darwin-kernel: Darwin Kernel platform plug-in. + tvos-simulator: Apple TV simulator platform plug-in. + watchos-simulator: Apple Watch simulator platform plug-in. + remote-tvos: Remote Apple TV platform plug-in. + remote-watchos: Remote Apple Watch platform plug-in. + remote-gdb-server: A platform that uses the GDB remote protocol as the communication transport. + +The default platform is the platform host which is used for local debugging. +Apart from this, the list should contain a number of plug-ins, for debugging +different kinds of systems. The remote plug-ins are prefixed with "remote-". +For example, to debug a remote Linux application: + +:: + + (lldb) platform select remote-linux + +After selecting the platform plug-in, you should receive a prompt which +confirms the selected platform, and states that you are not connected. This is +because remote plug-ins need to be connected to their remote platform +counterpart to operate. This is achieved using the platform connect command. +This command takes a number of arguments (as always, use the help command to +find out more), but normally you only need to specify the address to connect +to, e.g.: + +:: + + (lldb) platform connect connect://remote:1234 + Platform: remote-linux + Triple: x86_64-gnu-linux + Hostname: remote + Connected: yes + WorkingDir: /tmp + +Note that the platform has a working directory of /tmp. This directory will be +used as the directory that executables will be uploaded to by default when +launching a process from local. + +After this, you should be able to debug normally. You can use the process +attach to attach to an existing remote process or target create, process launch +to start a new one. The platform plugin will transparently take care of +uploading or downloading the executable in order to be able to debug. If your +application needs additional files, you can transfer them using the platform +commands: get-file, put-file, mkdir, etc. The environment can be prepared +further using the platform shell command. + +**Launching a locally built process on the remote machine** + +*Install and run in the platform working directory* + +To launch a locally built process on the remote system in the platform working +directory: + +:: + + (lldb) file a.out + (lldb) run + +This will cause LLDB to create a target with the "a.out" executable that you +cross built. The "run" command will cause LLDB to upload "a.out" to the +platform's current working directory only if the file has changed. The platform +connection allows us to transfer files, but also allows us to get the MD5 +checksum of the file on the other end and only upload the file if it has +changed. LLDB will automatically launch a lldb-server in gdbremote mode to +allow you to debug this executable, connect to it and start your debug session +for you. + +*Changing the platform working directory* + +You can change the platform working directory while connected to the platform +with: + +:: + + (lldb) platform settings -w /usr/local/bin + +And you can verify it worked using "platform status": + +:: + + (lldb) platform status + Platform: remote-linux + Triple: x86_64-gnu-linux + Hostname: remote + Connected: yes + WorkingDir: /usr/local/bin + +If we run again, the program will be installed into ``/usr/local/bin``. + +*Install and run by specifying a remote install path* + +If you want the "a.out" executable to be installed into "/bin/a.out" instead of +the platform's current working directory, we can set the platform file +specification using python: + +:: + + (lldb) file a.out + (lldb) script lldb.target.module['a.out'].SetPlatformFileSpec("/bin/a.out") + (lldb) run + +Now when you run your program, the program will be uploaded to "/bin/a.out" +instead of the platform current working directory. Only the main executable is +uploaded to the remote system by default when launching the application. If you +have shared libraries that should also be uploaded, then you can add the +locally build shared library to the current target and set its platform file +specification: + +:: + + (lldb) file a.out + (lldb) target module add /local/build/libfoo.so + (lldb) target module add /local/build/libbar.so + (lldb) script lldb.target.module['libfoo.so'].SetPlatformFileSpec("/usr/lib/libfoo.so") + (lldb) script lldb.target.module['libbar.so'].SetPlatformFileSpec("/usr/local/lib/libbar.so") + (lldb) run + +*Attaching to a remote process* + +If you want to attach to a remote process, you can first list the processes on +the remote system: + +:: + + (lldb) platform process list + 223 matching processes were found on "remote-linux" + PID PARENT USER TRIPLE NAME + ====== ====== ========== ======================== ============================ + 68639 90652 x86_64-apple-macosx lldb + ... + +Then attaching is as simple as specifying the remote process ID: + +:: + + (lldb) attach 68639 Index: docs/use/symbolication.rst =================================================================== --- /dev/null +++ docs/use/symbolication.rst @@ -0,0 +1,406 @@ +Symbolication +============= + +.. contents:: + :local: + + +LLDB is separated into a shared library that contains the core of the debugger, +and a driver that implements debugging and a command interpreter. LLDB can be +used to symbolicate your crash logs and can often provide more information than +other symbolication programs: + +- Inlined functions +- Variables that are in scope for an address, along with their locations + +The simplest form of symbolication is to load an executable: + +:: + + (lldb) target create --no-dependents --arch x86_64 /tmp/a.out + +We use the ``--no-dependents`` flag with the ``target create`` command so that +we don't load all of the dependent shared libraries from the current system. +When we symbolicate, we are often symbolicating a binary that was running on +another system, and even though the main executable might reference shared +libraries in ``/usr/lib``, we often don't want to load the versions on the +current computer. + +Using the ``image list`` command will show us a list of all shared libraries +associated with the current target. As expected, we currently only have a +single binary: + +:: + + (lldb) image list + [ 0] 73431214-6B76-3489-9557-5075F03E36B4 0x0000000100000000 /tmp/a.out + /tmp/a.out.dSYM/Contents/Resources/DWARF/a.out + +Now we can look up an address: + +:: + + (lldb) image lookup --address 0x100000aa3 + Address: a.out[0x0000000100000aa3] (a.out.__TEXT.__text + 131) + Summary: a.out`main + 67 at main.c:13 + +Since we haven't specified a slide or any load addresses for individual +sections in the binary, the address that we use here is a file address. A file +address refers to a virtual address as defined by each object file. + +If we didn't use the ``--no-dependents`` option with ``target create``, we +would have loaded all dependent shared libraries: + +:: + + (lldb) image list + [ 0] 73431214-6B76-3489-9557-5075F03E36B4 0x0000000100000000 /tmp/a.out + /tmp/a.out.dSYM/Contents/Resources/DWARF/a.out + [ 1] 8CBCF9B9-EBB7-365E-A3FF-2F3850763C6B 0x0000000000000000 /usr/lib/system/libsystem_c.dylib + [ 2] 62AA0B84-188A-348B-8F9E-3E2DB08DB93C 0x0000000000000000 /usr/lib/system/libsystem_dnssd.dylib + [ 3] C0535565-35D1-31A7-A744-63D9F10F12A4 0x0000000000000000 /usr/lib/system/libsystem_kernel.dylib + ... + +Now if we do a lookup using a file address, this can result in multiple matches +since most shared libraries have a virtual address space that starts at zero: + +:: + + (lldb) image lookup -a 0x1000 + Address: a.out[0x0000000000001000] (a.out.__PAGEZERO + 4096) + + Address: libsystem_c.dylib[0x0000000000001000] (libsystem_c.dylib.__TEXT.__text + 928) + Summary: libsystem_c.dylib`mcount + 9 + + Address: libsystem_dnssd.dylib[0x0000000000001000] (libsystem_dnssd.dylib.__TEXT.__text + 456) + Summary: libsystem_dnssd.dylib`ConvertHeaderBytes + 38 + + Address: libsystem_kernel.dylib[0x0000000000001000] (libsystem_kernel.dylib.__TEXT.__text + 1116) + Summary: libsystem_kernel.dylib`clock_get_time + 102 + ... + +To avoid getting multiple file address matches, you can specify the name of the +shared library to limit the search: + +:: + + (lldb) image lookup -a 0x1000 a.out + Address: a.out[0x0000000000001000] (a.out.__PAGEZERO + 4096) + +Defining Load Addresses for Sections +------------------------------------ + +When symbolicating your crash logs, it can be tedious if you always have to +adjust your crashlog-addresses into file addresses. To avoid having to do any +conversion, you can set the load address for the sections of the modules in +your target. Once you set any section load address, lookups will switch to +using load addresses. You can slide all sections in the executable by the same +amount, or set the load address for individual sections. The ``target modules +load --slide`` command allows us to set the load address for all sections. + +Below is an example of sliding all sections in a.out by adding 0x123000 to each +section's file address: + +:: + + (lldb) target create --no-dependents --arch x86_64 /tmp/a.out + (lldb) target modules load --file a.out --slide 0x123000 + + +It is often much easier to specify the actual load location of each section by +name. Crash logs on Mac OS X have a Binary Images section that specifies that +address of the __TEXT segment for each binary. Specifying a slide requires +requires that you first find the original (file) address for the __TEXT +segment, and subtract the two values. If you specify the address of the __TEXT +segment with ``target modules load section address``, you don't need to do any +calculations. To specify the load addresses of sections we can specify one or +more section name + address pairs in the ``target modules load`` command: + +:: + + (lldb) target create --no-dependents --arch x86_64 /tmp/a.out + (lldb) target modules load --file a.out __TEXT 0x100123000 + +We specified that the __TEXT section is loaded at 0x100123000. Now that we have +defined where sections have been loaded in our target, any lookups we do will +now use load addresses so we don't have to do any math on the addresses in the +crashlog backtraces, we can just use the raw addresses: + +:: + + (lldb) image lookup --address 0x100123aa3 + Address: a.out[0x0000000100000aa3] (a.out.__TEXT.__text + 131) + Summary: a.out`main + 67 at main.c:13 + +Loading Multiple Executables +---------------------------- + +You often have more than one executable involved when you need to symbolicate a +crash log. When this happens, you create a target for the main executable or +one of the shared libraries, then add more modules to the target using the +``target modules add`` command. + +Lets say we have a Darwin crash log that contains the following images: + +:: + + Binary Images: + 0x100000000 - 0x100000ff7 /tmp/a.out + 0x7fff83f32000 - 0x7fff83ffefe7 <8CBCF9B9-EBB7-365E-A3FF-2F3850763C6B> /usr/lib/system/libsystem_c.dylib + 0x7fff883db000 - 0x7fff883e3ff7 <62AA0B84-188A-348B-8F9E-3E2DB08DB93C> /usr/lib/system/libsystem_dnssd.dylib + 0x7fff8c0dc000 - 0x7fff8c0f7ff7 /usr/lib/system/libsystem_kernel.dylib + +First we create the target using the main executable and then add any extra +shared libraries we want: + +:: + + (lldb) target create --no-dependents --arch x86_64 /tmp/a.out + (lldb) target modules add /usr/lib/system/libsystem_c.dylib + (lldb) target modules add /usr/lib/system/libsystem_dnssd.dylib + (lldb) target modules add /usr/lib/system/libsystem_kernel.dylib + + +If you have debug symbols in standalone files, such as dSYM files on Mac OS X, +you can specify their paths using the --symfile option for the ``target create`` +(recent LLDB releases only) and ``target modules add`` commands: + +:: + + (lldb) target create --no-dependents --arch x86_64 /tmp/a.out --symfile /tmp/a.out.dSYM + (lldb) target modules add /usr/lib/system/libsystem_c.dylib --symfile /build/server/a/libsystem_c.dylib.dSYM + (lldb) target modules add /usr/lib/system/libsystem_dnssd.dylib --symfile /build/server/b/libsystem_dnssd.dylib.dSYM + (lldb) target modules add /usr/lib/system/libsystem_kernel.dylib --symfile /build/server/c/libsystem_kernel.dylib.dSYM + +Then we set the load addresses for each __TEXT section (note the colors of the +load addresses above and below) using the first address from the Binary Images +section for each image: + +:: + + (lldb) target modules load --file a.out 0x100000000 + (lldb) target modules load --file libsystem_c.dylib 0x7fff83f32000 + (lldb) target modules load --file libsystem_dnssd.dylib 0x7fff883db000 + (lldb) target modules load --file libsystem_kernel.dylib 0x7fff8c0dc000 + + +Now any stack backtraces that haven't been symbolicated can be symbolicated +using ``image lookup`` with the raw backtrace addresses. + +Given the following raw backtrace: + +:: + + Thread 0 Crashed:: Dispatch queue: com.apple.main-thread + 0 libsystem_kernel.dylib 0x00007fff8a1e6d46 __kill + 10 + 1 libsystem_c.dylib 0x00007fff84597df0 abort + 177 + 2 libsystem_c.dylib 0x00007fff84598e2a __assert_rtn + 146 + 3 a.out 0x0000000100000f46 main + 70 + 4 libdyld.dylib 0x00007fff8c4197e1 start + 1 + +We can now symbolicate the load addresses: + +:: + + (lldb) image lookup -a 0x00007fff8a1e6d46 + (lldb) image lookup -a 0x00007fff84597df0 + (lldb) image lookup -a 0x00007fff84598e2a + (lldb) image lookup -a 0x0000000100000f46 + + +Getting Variable Information +---------------------------- + +If you add the --verbose flag to the ``image lookup --address`` command, you +can get verbose information which can often include the locations of some of +your local variables: + +:: + + + (lldb) image lookup --address 0x100123aa3 --verbose + Address: a.out[0x0000000100000aa3] (a.out.__TEXT.__text + 110) + Summary: a.out`main + 50 at main.c:13 + Module: file = "/tmp/a.out", arch = "x86_64" + CompileUnit: id = {0x00000000}, file = "/tmp/main.c", language = "ISO C:1999" + Function: id = {0x0000004f}, name = "main", range = [0x0000000100000bc0-0x0000000100000dc9) + FuncType: id = {0x0000004f}, decl = main.c:9, compiler_type = "int (int, const char **, const char **, const char **)" + Blocks: id = {0x0000004f}, range = [0x100000bc0-0x100000dc9) + id = {0x000000ae}, range = [0x100000bf2-0x100000dc4) + LineEntry: [0x0000000100000bf2-0x0000000100000bfa): /tmp/main.c:13:23 + Symbol: id = {0x00000004}, range = [0x0000000100000bc0-0x0000000100000dc9), name="main" + Variable: id = {0x000000bf}, name = "path", type= "char [1024]", location = DW_OP_fbreg(-1072), decl = main.c:28 + Variable: id = {0x00000072}, name = "argc", type= "int", location = r13, decl = main.c:8 + Variable: id = {0x00000081}, name = "argv", type= "const char **", location = r12, decl = main.c:8 + Variable: id = {0x00000090}, name = "envp", type= "const char **", location = r15, decl = main.c:8 + Variable: id = {0x0000009f}, name = "aapl", type= "const char **", location = rbx, decl = main.c:8 + +The interesting part is the variables that are listed. The variables are the +parameters and local variables that are in scope for the address that was +specified. These variable entries have locations which are shown in bold above. +Crash logs often have register information for the first frame in each stack, +and being able to reconstruct one or more local variables can often help you +decipher more information from a crash log than you normally would be able to. +Note that this is really only useful for the first frame, and only if your +crash logs have register information for your threads. + +Using Python API to Symbolicate +------------------------------- + +All of the commands above can be done through the python script bridge. The +code below will recreate the target and add the three shared libraries that we +added in the darwin crash log example above: + +:: + + triple = "x86_64-apple-macosx" + platform_name = None + add_dependents = False + target = lldb.debugger.CreateTarget("/tmp/a.out", triple, platform_name, add_dependents, lldb.SBError()) + if target: + # Get the executable module + module = target.GetModuleAtIndex(0) + target.SetSectionLoadAddress(module.FindSection("__TEXT"), 0x100000000) + module = target.AddModule ("/usr/lib/system/libsystem_c.dylib", triple, None, "/build/server/a/libsystem_c.dylib.dSYM") + target.SetSectionLoadAddress(module.FindSection("__TEXT"), 0x7fff83f32000) + module = target.AddModule ("/usr/lib/system/libsystem_dnssd.dylib", triple, None, "/build/server/b/libsystem_dnssd.dylib.dSYM") + target.SetSectionLoadAddress(module.FindSection("__TEXT"), 0x7fff883db000) + module = target.AddModule ("/usr/lib/system/libsystem_kernel.dylib", triple, None, "/build/server/c/libsystem_kernel.dylib.dSYM") + target.SetSectionLoadAddress(module.FindSection("__TEXT"), 0x7fff8c0dc000) + + load_addr = 0x00007fff8a1e6d46 + # so_addr is a section offset address, or a lldb.SBAddress object + so_addr = target.ResolveLoadAddress (load_addr) + # Get a symbol context for the section offset address which includes + # a module, compile unit, function, block, line entry, and symbol + sym_ctx = so_addr.GetSymbolContext (lldb.eSymbolContextEverything) + print sym_ctx + + +Use Builtin Python Module to Symbolicate +---------------------------------------- + +LLDB includes a module in the lldb package named lldb.utils.symbolication. This module contains a lot of symbolication functions that simplify the symbolication process by allowing you to create objects that represent symbolication class objects such as: + +- lldb.utils.symbolication.Address +- lldb.utils.symbolication.Section +- lldb.utils.symbolication.Image +- lldb.utils.symbolication.Symbolicator + + +**lldb.utils.symbolication.Address** + +This class represents an address that will be symbolicated. It will cache any +information that has been looked up: module, compile unit, function, block, +line entry, symbol. It does this by having a lldb.SBSymbolContext as a member +variable. + +**lldb.utils.symbolication.Section** + +This class represents a section that might get loaded in a +lldb.utils.symbolication.Image. It has helper functions that allow you to set +it from text that might have been extracted from a crash log file. + +**lldb.utils.symbolication.Image** + +This class represents a module that might get loaded into the target we use for +symbolication. This class contains the executable path, optional symbol file +path, the triple, and the list of sections that will need to be loaded if we +choose the ask the target to load this image. Many of these objects will never +be loaded into the target unless they are needed by symbolication. You often +have a crash log that has 100 to 200 different shared libraries loaded, but +your crash log stack backtraces only use a few of these shared libraries. Only +the images that contain stack backtrace addresses need to be loaded in the +target in order to symbolicate. + +Subclasses of this class will want to override the +locate_module_and_debug_symbols method: + +:: + + class CustomImage(lldb.utils.symbolication.Image): + def locate_module_and_debug_symbols (self): + # Locate the module and symbol given the info found in the crash log + +Overriding this function allows clients to find the correct executable module +and symbol files as they might reside on a build server. + +**lldb.utils.symbolication.Symbolicator** + +This class coordinates the symbolication process by loading only the +lldb.utils.symbolication.Image instances that need to be loaded in order to +symbolicate an supplied address. + +**lldb.macosx.crashlog** + +lldb.macosx.crashlog is a package that is distributed on Mac OS X builds that +subclasses the above classes. This module parses the information in the Darwin +crash logs and creates symbolication objects that represent the images, the +sections and the thread frames for the backtraces. It then uses the functions +in the lldb.utils.symbolication to symbolicate the crash logs. + +This module installs a new ``crashlog`` command into the lldb command +interpreter so that you can use it to parse and symbolicate Mac OS X crash +logs: + +:: + + (lldb) command script import lldb.macosx.crashlog + "crashlog" and "save_crashlog" command installed, use the "--help" option for detailed help + (lldb) crashlog /tmp/crash.log + ... + +The command that is installed has built in help that shows the options that can +be used when symbolicating: + +:: + + (lldb) crashlog --help + Usage: crashlog [options] [FILE ...] + +Symbolicate one or more darwin crash log files to provide source file and line +information, inlined stack frames back to the concrete functions, and +disassemble the location of the crash for the first frame of the crashed +thread. If this script is imported into the LLDB command interpreter, a +``crashlog`` command will be added to the interpreter for use at the LLDB +command line. After a crash log has been parsed and symbolicated, a target will +have been created that has all of the shared libraries loaded at the load +addresses found in the crash log file. This allows you to explore the program +as if it were stopped at the locations described in the crash log and functions +can be disassembled and lookups can be performed using the addresses found in +the crash log. + +:: + + Options: + -h, --help show this help message and exit + -v, --verbose display verbose debug info + -g, --debug display verbose debug logging + -a, --load-all load all executable images, not just the images found + in the crashed stack frames + --images show image list + --debug-delay=NSEC pause for NSEC seconds for debugger + -c, --crashed-only only symbolicate the crashed thread + -d DISASSEMBLE_DEPTH, --disasm-depth=DISASSEMBLE_DEPTH + set the depth in stack frames that should be + disassembled (default is 1) + -D, --disasm-all enabled disassembly of frames on all threads (not just + the crashed thread) + -B DISASSEMBLE_BEFORE, --disasm-before=DISASSEMBLE_BEFORE + the number of instructions to disassemble before the + frame PC + -A DISASSEMBLE_AFTER, --disasm-after=DISASSEMBLE_AFTER + the number of instructions to disassemble after the + frame PC + -C NLINES, --source-context=NLINES + show NLINES source lines of source context (default = + 4) + --source-frames=NFRAMES + show source for NFRAMES (default = 4) + --source-all show source for all threads, not just the crashed + thread + -i, --interactive parse all crash logs and enter interactive mode + + +The source for the "symbolication" and "crashlog" modules are available in SVN. + Index: docs/use/symbols.rst =================================================================== --- /dev/null +++ docs/use/symbols.rst @@ -0,0 +1,317 @@ +Symbols on macOS +================ + +.. contents:: + :local: + +On macOS, debug symbols are often in stand alone bundles called **dSYM** files. +These are bundles that contain DWARF debug information and other resources +related to builds and debug info. + +The DebugSymbols.framework framework helps locate dSYM files when given a UUID. +It can locate the symbols using a variety of methods: + +- Spotlight +- Explicit search paths +- Implicit search paths +- File mapped UUID paths +- Running one or more shell scripts + +DebugSymbols.framework also has global defaults that can be modified to allow +all of the debug tools (lldb, gdb, sample, CoreSymbolication.framework) to +easily find important debug symbols. The domain for the DebugSymbols.framework +defaults is **com.apple.DebugSymbols**, and the defaults can be read, written +or modified using the **defaults** shell command: + +:: + + % defaults read com.apple.DebugSymbols + % defaults write com.apple.DebugSymbols KEY ... + % defaults delete com.apple.DebugSymbols KEY + +The following is a list of the defaults key value setting pairs that can +be used to enhance symbol location: + +**DBGFileMappedPaths** + +This default can be specified as a single string, or an array of +strings. Each string represents a directory that contains file mapped +UUID values that point to dSYM files. See the "File Mapped UUID +Directories" section below for more details. Whenever +DebugSymbols.framework is asked to lookup a dSYM file, it will first +look in any file mapped UUID directories for a quick match. + +:: + + % defaults write com.apple.DebugSymbols DBGFileMappedPaths -string /path/to/uuidmap1 + % defaults write com.apple.DebugSymbols DBGFileMappedPaths -array /path/to/uuidmap1 + /path/to/uuidmap2 + +**DBGShellCommands** + +This default can be specified as a single string, or an array of +strings. Specifies a shell script that will get run in order to find the +dSYM. The shell script will be run given a single UUID value as the +shell command arguments and the shell command is expected to return a +property list. See the property list format defined below. + +:: + + % defaults write com.apple.DebugSymbols DBGShellCommands -string /path/to/script1 + % defaults write com.apple.DebugSymbols DBGShellCommands -array /path/to/script1 + /path/to/script2 + +**DBGSpotlightPaths** + +Specifies the directories to limit spotlight searches to as a string or +array of strings. When any other defaults are supplied to +**com.apple.DebugSymbols**, spotlight searches will be disabled unless +this default is set to an empty array: + +:: + + # Specify an empty array to keep Spotlight searches enabled in all locations + % defaults write com.apple.DebugSymbols DBGSpotlightPaths -array + + # Specify an array of paths to limit spotlight searches to certain directories + % defaults write com.apple.DebugSymbols DBGSpotlightPaths -array /path/dir1 /path/dir2 + +Shell Script Property List Format +--------------------------------- + +Shell scripts that are specified with the **DBGShellCommands** defaults key +will be run in the order in which they are specified until a match is found. +The shell script will be invoked with a single UUID string value like +"23516BE4-29BE-350C-91C9-F36E7999F0F1". The shell script must respond with a +property list being written to STDOUT. The property list returned must contain +UUID string values as the root key values, with a dictionary for each UUID. The +dictionaries can contain one or more of the following keys: + ++-----------------------------------+-----------------------------------+ +| Key | Description | ++-----------------------------------+-----------------------------------+ +| **DBGArchitecture** | A textual architecture or target | +| | triple like "x86_64", "i386", or | +| | "x86_64-apple-macosx". | ++-----------------------------------+-----------------------------------+ +| **DBGBuildSourcePath** | A path prefix that was used when | +| | building the dSYM file. The debug | +| | information will contain paths | +| | with this prefix. | ++-----------------------------------+-----------------------------------+ +| **DBGSourcePath** | A path prefix for where the | +| | sources exist after the build has | +| | completed. Often when building | +| | projects, build machines will | +| | host the sources in a temporary | +| | directory while building, then | +| | move the sources to another | +| | location for archiving. If the | +| | paths in the debug info don't | +| | match where the sources are | +| | currently hosted, then specifying | +| | this path along with the | +| | **DBGBuildSourcePath** will help | +| | the developer tools always show | +| | you sources when debugging or | +| | symbolicating. | ++-----------------------------------+-----------------------------------+ +| **DBGDSYMPath** | A path to the dSYM mach-o file | +| | inside the dSYM bundle. | ++-----------------------------------+-----------------------------------+ +| **DBGSymbolRichExecutable** | A path to the symbol rich | +| | executable. Binaries are often | +| | stripped after being built and | +| | packaged into a release. If your | +| | build systems saves an unstripped | +| | executable a path to this | +| | executable can be provided. | ++-----------------------------------+-----------------------------------+ +| **DBGError** | If a binary can not be located | +| | for the supplied UUID, a user | +| | readable error can be returned. | ++-----------------------------------+-----------------------------------+ + +Below is a sample shell script output for a binary that contains two +architectures: + +:: + + + + + + 23516BE4-29BE-350C-91C9-F36E7999F0F1 + + DBGArchitecture + i386 + DBGBuildSourcePath + /path/to/build/sources + DBGSourcePath + /path/to/actual/sources + DBGDSYMPath + /path/to/foo.dSYM/Contents/Resources/DWARF/foo + DBGSymbolRichExecutable + /path/to/unstripped/executable + + A40597AA-5529-3337-8C09-D8A014EB1578 + + DBGArchitecture + x86_64 + DBGBuildSourcePath + /path/to/build/sources + DBGSourcePath + /path/to/actual/sources + DBGDSYMPath + /path/to/foo.dSYM/Contents/Resources/DWARF/foo + DBGSymbolRichExecutable + /path/to/unstripped/executable + + + + +There is no timeout imposed on a shell script when is it asked to locate a dSYM +file, so be careful to not make a shell script that has high latency or takes a +long time to download unless this is really what you want. This can slow down +debug sessions in LLDB and GDB, symbolication with CoreSymbolication or Report +Crash, with no visible feedback to the user. You can quickly return a plist +with a single **DBGError** key that indicates a timeout has been reached. You +might also want to exec new processes to do the downloads so that if you return +an error that indicates a timeout, your download can still proceed after your +shell script has exited so subsequent debug sessions can use the cached files. +It is also important to track when a current download is in progress in case +you get multiple requests for the same UUID so that you don't end up +downloading the same file simultaneously. Also you will want to verify the +download was successful and then and only then place the file into the cache +for tools that will cache files locally. + +Embedding UUID property lists inside the dSYM bundles +----------------------------------------------------- + +Since dSYM files are bundles, you can also place UUID info plists files inside +your dSYM bundles in the **Contents/Resources** directory. One of the main +reasons to create the UUID plists inside the dSYM bundles is that it will help +LLDB and other developer tools show you source. LLDB currently knows how to +check for these plist files so it can automatically remap the source location +information in the debug info. + +If we take the two UUID values from the returns plist above, we can split them +out and save then in the dSYM bundle: + +:: + + % ls /path/to/foo.dSYM/Contents/Resources + 23516BE4-29BE-350C-91C9-F36E7999F0F1.plist + A40597AA-5529-3337-8C09-D8A014EB1578.plist + + % cat /path/to/foo.dSYM/Contents/Resources/23516BE4-29BE-350C-91C9-F36E7999F0F1.plist + + + + + DBGArchitecture + i386 + DBGBuildSourcePath + /path/to/build/sources + DBGSourcePath + /path/to/actual/sources + DBGDSYMPath + /path/to/foo.dSYM/Contents/Resources/DWARF/foo + DBGSymbolRichExecutable + /path/to/unstripped/executable + DBGVersion + 3 + DBGSourcePathRemapping + + /path/to/build/time/src/location1 + /path/to/debug/time/src/location + /path/to/build/time/src/location2 + /path/to/debug/time/src/location + + DBGSymbolRichExecutable + /path/to/unstripped/executable + + + +Note that the output is very close to what is needed by shell script output, so +making the results of your shell script will be very easy to create by +combining two plists into a single one where you take the UUID and use it a +string key, and the value is the contents of the plist. + +LLDB will read the following entries from the per-UUID plist file in the dSYM +bundle: **DBGSymbolRichExecutable**, **DBGBuildSourcePath** and +**DBGSourcePath**, and **DBGSourcePathRemapping** if **DBGVersion** is 3 or +higher. **DBGBuildSourcePath** and **DBGSourcePath** are for remapping a single +file path. For instance, the files may be in /BuildDir/SheetApp/SheetApp-37 +when built, but they are in /SourceDir/SheetApp/SheetApp-37 at debug time, +those two paths could be listed in those keys. If there are multiple source +path remappings, the **DBGSourcePathRemapping** dictionary can be used, where +an arbitrary number of entries may be present. **DBGVersion** should be 3 or +**DBGSourcePathRemapping** will not be read. If both **DBGSourcePathRemapping** +AND **DBGBuildSourcePath**/**DBGSourcePath** are present in the plist, the +**DBGSourcePathRemapping** entries will be used for path remapping first. This +may allow for more specific remappings in the **DBGSourcePathRemapping** +dictionary and a less specific remapping in the +**DBGBuildSourcePath**/**DBGSourcePath** pair as a last resort. + +File Mapped UUID Directories +---------------------------- + +File Mapped directories can be used for efficient dSYM file lookups for local +or remote dSYM files. The UUID is broken up by splitting the first 20 hex +digits into 4 character chunks, and a directory is created for each chunk, and +each subsequent directory is created inside the previous one. A symlink is then +created whose name is the last 12 hex digits in the deepest directory. The +symlinks value is a full path to the mach-o files inside the dSYM bundle which +contains the DWARF. Whenever DebugSymbols.framework is asked to lookup a dSYM +file, it will first look in any file mapped UUID directories for a quick match +if the defaults are appropriately set. + +For example, if we take the sample UUID plist inforamtion from above, we can +create a File Mapped UUID directory cache in +**~/Library/SymbolCache/dsyms/uuids**. We can easily see how things are laid +out: + +:: + + % find ~/Library/SymbolCache/dsyms/uuids -type l + ~/Library/SymbolCache/dsyms/uuids/2351/6BE4/29BE/350C/91C9/F36E7999F0F1 + ~/Library/SymbolCache/dsyms/uuids/A405/97AA/5529/3337/8C09/D8A014EB1578 + +The last entries in these file mapped directories are symlinks to the actual +dsym mach file in the dsym bundle: + +:: + + % ls -lAF ~/Library/SymbolCache/dsyms/uuids/2351/6BE4/29BE/350C/91C9/F36E7999F0F1 + ~/Library/SymbolCache/dsyms/uuids/2351/6BE4/29BE/350C/91C9/F36E7999F0F1@ -> ../../../../../../dsyms/foo.dSYM/Contents/Resources/DWARF/foo + +Then you can also tell DebugSymbols to check this UUID file map cache using: + +:: + + % defaults write com.apple.DebugSymbols DBGFileMappedPaths ~/Library/SymbolCache/dsyms/uuids + +dSYM Locating Shell Script Tips +------------------------------- + +One possible implementation of a dSYM finding shell script is to have the +script download and cache files locally in a known location. Then create a UUID +map for each UUID value that was found in a local UUID File Map cache so the +next query for the dSYM file will be able to use the cached version. So the +shell script is used to initially download and cache the file, and subsequent +accesses will use the cache and avoid calling the shell script. + +Then the defaults for DebugSymbols.framework will entail enabling your shell +script, enabling the file mapped path setting so that already downloaded dSYMS +fill quickly be found without needing to run the shell script every time, and +also leaving spotlight enabled so that other normal dSYM files are still found: + +:: + + % defaults write com.apple.DebugSymbols DBGShellCommands /path/to/shellscript + % defaults write com.apple.DebugSymbols DBGFileMappedPaths ~/Library/SymbolCache/dsyms/uuids + % defaults write com.apple.DebugSymbols DBGSpotlightPaths -array + +Hopefully this helps explain how DebugSymbols.framework can help any company +implement a smart symbol finding and caching with minimal overhead. Index: docs/use/troubleshooting.rst =================================================================== --- /dev/null +++ docs/use/troubleshooting.rst @@ -0,0 +1,102 @@ +Troubleshooting +=============== + +.. contents:: + :local: + +File and Line Breakpoints Are Not Getting Hit +--------------------------------------------- + +First you must make sure that your source files were compiled with debug +information. Typically this means passing -g to the compiler when compiling +your source file. + +When setting breakpoints in implementation source files (.c, cpp, cxx, .m, .mm, +etc), LLDB by default will only search for compile units whose filename +matches. If your code does tricky things like using #include to include source +files: + +:: + + % cat foo.c + #include "bar.c" + #include "baz.c" + ... + +This will cause breakpoints in "bar.c" to be inlined into the compile unit for +"foo.c". If your code does this, or if your build system combines multiple +files in some way such that breakpoints from one implementation file will be +compiled into another implementation file, you will need to tell LLDB to always +search for inlined breakpoint locations by adding the following line to your +~/.lldbinit file: + +:: + + % echo "settings set target.inline-breakpoint-strategy always" >> ~/.lldbinit + +This tells LLDB to always look in all compile units and search for breakpoint +locations by file and line even if the implementation file doesn't match. +Setting breakpoints in header files always searches all compile units because +inline functions are commonly defined in header files and often cause multiple +breakpoints to have source line information that matches many header file +paths. + +If you set a file and line breakpoint using a full path to the source file, +like Xcode does when setting a breakpoint in its GUI on Mac OS X when you click +in the gutter of the source view, this path must match the full paths in the +debug information. If the paths mismatch, possibly due to passing in a resolved +source file path that doesn't match an unresolved path in the debug +information, this can cause breakpoints to not be resolved. Try setting +breakpoints using the file basename only. + +If you are using an IDE and you move your project in your file system and build +again, sometimes doing a clean then build will solve the issue.This will fix +the issue if some .o files didn't get rebuilt after the move as the .o files in +the build folder might still contain stale debug information with the old +source locations. + +How Do I Check If I Have Debug Symbols? +--------------------------------------- + +Checking if a module has any compile units (source files) is a good way to +check if there is debug information in a module: + +:: + + (lldb) file /tmp/a.out + (lldb) image list + [ 0] 71E5A649-8FEF-3887-9CED-D3EF8FC2FD6E 0x0000000100000000 /tmp/a.out + /tmp/a.out.dSYM/Contents/Resources/DWARF/a.out + [ 1] 6900F2BA-DB48-3B78-B668-58FC0CF6BCB8 0x00007fff5fc00000 /usr/lib/dyld + .... + (lldb) script lldb.target.module['/tmp/a.out'].GetNumCompileUnits() + 1 + (lldb) script lldb.target.module['/usr/lib/dyld'].GetNumCompileUnits() + 0 + +Above we can see that "/tmp/a.out" does have a compile unit, and +"/usr/lib/dyld" does not. + +We can also list the full paths to all compile units for a module using python: + +:: + + (lldb) script + Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D. + >>> m = lldb.target.module['a.out'] + >>> for i in range(m.GetNumCompileUnits()): + ... cu = m.GetCompileUnitAtIndex(i).file.fullpath + /tmp/main.c + /tmp/foo.c + /tmp/bar.c + >>> + +This can help to show the actual full path to the source files. Sometimes IDEs +will set breakpoints by full paths where the path doesn't match the full path +in the debug info and this can cause LLDB to not resolve breakpoints. You can +use the breakpoint list command with the --verbose option to see the full paths +for any source file and line breakpoints that the IDE set using: + +:: + + (lldb) breakpoint list --verbose Index: docs/use/tutorial.rst =================================================================== --- /dev/null +++ docs/use/tutorial.rst @@ -0,0 +1,673 @@ +Tutorial +======== + +Here's a short precis of how to run lldb if you are familiar with the gdb +command set. We will start with some details on lldb command structure and +syntax to help orient you. + +.. contents:: + :local: + +Command Structure +----------------- + +Unlike gdb's command set, which is rather free-form, we tried to make the lldb command syntax fairly structured. The commands are all of the form: + +:: + + [-options [option-value]] [argument [argument...]] + +The command line parsing is done before command execution, so it is uniform +across all the commands. The command syntax for basic commands is very simple, +arguments, options and option values are all white-space separated, and +double-quotes are used to protect white-spaces in an argument. If you need to +put a backslash or double-quote character in an argument you back-slash it in +the argument. That makes the command syntax more regular, but it also means you +may have to quote some arguments in lldb that you wouldn't in gdb. + + +Options can be placed anywhere on the command line, but if the arguments begin +with a "-" then you have to tell lldb that you're done with options for the +current command by adding an option termination: "--" So for instance if you +want to launch a process and give the "process launch" command the +"--stop-at-entry" option, yet you want the process you are about to launch to +be launched with the arguments "-program_arg value", you would type: + +:: + + (lldb) process launch --stop-at-entry -- -program_arg value + +We also tried to reduce the number of special purpose argument parsers, which +sometimes forces the user to be a little more explicit about stating their +intentions. The first instance you'll note of this is the breakpoint command. +In gdb, to set a breakpoint, you might enter + +:: + + (gdb) break foo.c:12 + +to break at line 12 of foo.c, and: + +:: + + (gdb) break foo + +to break at the function foo. As time went on, the parser that tells foo.c:12 +from foo from foo.c::foo (which means the function foo in the file foo.c) got +more and more complex and bizarre, and especially in C++ there are times where +there's really no way to specify the function you want to break on. The lldb +commands are more verbose but also more precise and allow for intelligent auto +completion. + +To set the same file and line breakpoint in LLDB you can enter either of: + +:: + + (lldb) breakpoint set --file foo.c --line 12 + (lldb) breakpoint set -f foo.c -l 12 + +To set a breakpoint on a function named foo in LLDB you can enter either of: + +:: + + (lldb) breakpoint set --name foo + (lldb) breakpoint set -n foo + +You can use the --name option multiple times to make a breakpoint on a set of +functions as well. This is convenient since it allows you to set common +conditions or commands without having to specify them multiple times: + +:: + + (lldb) breakpoint set --name foo --name bar + +Setting breakpoints by name is even more specialized in LLDB as you can specify +that you want to set a breakpoint at a function by method name. To set a +breakpoint on all C++ methods named foo you can enter either of: + +:: + + (lldb) breakpoint set --method foo + (lldb) breakpoint set -M foo + + +To set a breakpoint Objective-C selectors named alignLeftEdges: you can enter either of: + +:: + + (lldb) breakpoint set --selector alignLeftEdges: + (lldb) breakpoint set -S alignLeftEdges: + +You can limit any breakpoints to a specific executable image by using the +"--shlib " ("-s " for short): + +:: + + (lldb) breakpoint set --shlib foo.dylib --name foo + (lldb) breakpoint set -s foo.dylib -n foo + +The --shlib option can also be repeated to specify several shared libraries. + +Suggestions on more interesting primitives of this sort are also very welcome. + +Just like gdb, the lldb command interpreter does a shortest unique string match +on command names, so the following two commands will both execute the same +command: + +:: + + (lldb) breakpoint set -n "-[SKTGraphicView alignLeftEdges:]" + (lldb) br s -n "-[SKTGraphicView alignLeftEdges:]" + +lldb also supports command completion for source file names, symbol names, file +names, etc. Completion is initiated by a hitting a TAB. Individual options in a +command can have different completers, so for instance the "--file " +option in "breakpoint" completes to source files, the "--shlib " option +to currently loaded shared libraries, etc. We can even do things like if you +specify "--shlib ", and are completing on "--file ", we will only +list source files in the shared library specified by "--shlib ". + +The individual commands are pretty extensively documented. You can use the help +command to get an overview of which commands are available or to obtain details +about specific commands. There is also an apropos command that will search the +help text for all commands for a particular word and dump a summary help string +for each matching command. + +Finally, there is a mechanism to construct aliases for commonly used commands. +So for instance if you get annoyed typing: + +:: + + (lldb) breakpoint set --file foo.c --line 12 + +you can do: + +:: + + (lldb) command alias bfl breakpoint set -f %1 -l %2 + (lldb) bfl foo.c 12 + +We have added a few aliases for commonly used commands (e.g. "step", "next" and +"continue") but we haven't tried to be exhaustive because in our experience it +is more convenient to make the basic commands unique down to a letter or two, +and then learn these sequences than to fill the namespace with lots of aliases, +and then have to type them all the way out. + +However, users are free to customize lldb's command set however they like, and +since lldb reads the file ~/.lldbinit at startup, you can store all your +aliases there and they will be generally available to you. Your aliases are +also documented in the help command so you can remind yourself of what you've +set up. + +One alias of note that we do include by popular demand is a weak emulator of +gdb's "break" command. It doesn't try to do everything that gdb's break command +does (for instance, it doesn't handle foo.c::bar. But it mostly works, and +makes the transition easier. Also by popular demand, it is aliased to b. If you +actually want to learn the lldb command set natively, that means it will get in +the way of the rest of the breakpoint commands. Fortunately, if you don't like +one of our aliases, you an easily get rid of it by running (for example): + +:: + + (lldb) command unalias b + +I actually also do: + +:: + + (lldb) command alias b breakpoint + +so I can run the native lldb breakpoint command with just b + +The lldb command parser also supports "raw" commands, where, after command +options are stripped off, the rest of the command string is passed +uninterpreted to the command. This is convenient for commands whose arguments +might be some complex expression that would be painful to backslash protect. +For instance the "expression" command is a "raw" command for obvious reasons. +The "help" output for a command will tell you if it is "raw" or not, so you +know what to expect. The one thing you have to watch out for is that since raw +commands still can have options, if your command string has dashes in it, +you'll have to indicate these are not option markers by putting "--" after the +command name, but before your command string. + +lldb also has a built-in Python interpreter, which is accessible by the +"script" command. All the functionality of the debugger is available as classes +in the Python interpreter, so the more complex commands that in gdb you would +introduce with the "define" command can be done by writing Python functions +using the lldb-Python library, then loading the scripts into your running +session and accessing them with the "script" command. + +Having given an overview of lldb's command syntax, we proceed to lay out the +stages of a standard debug session. + + +Loading a Program into lldb +--------------------------- + +First we need to set the program to debug. As with gdb, you can start lldb and specify the file you wish to debug on the command line: + +:: + + $ lldb /Projects/Sketch/build/Debug/Sketch.app + Current executable set to '/Projects/Sketch/build/Debug/Sketch.app' (x86_64). + +or you can specify it after the fact with the "file" command: + +:: + + $ lldb + (lldb) file /Projects/Sketch/build/Debug/Sketch.app + Current executable set to '/Projects/Sketch/build/Debug/Sketch.app' (x86_64). + +Setting Breakpoints +------------------- + +We've discussed how to set breakpoints above. You can use help breakpoint set +to see all the options for breakpoint setting. For instance, we might do: + +:: + + (lldb) breakpoint set --selector alignLeftEdges: + Breakpoint created: 1: name = 'alignLeftEdges:', locations = 1, resolved = 1 + +You can find out about the breakpoints you've set with: + +:: + + (lldb) breakpoint list + Current breakpoints: + 1: name = 'alignLeftEdges:', locations = 1, resolved = 1 + 1.1: where = Sketch`-[SKTGraphicView alignLeftEdges:] + 33 at /Projects/Sketch/SKTGraphicView.m:1405, address = 0x0000000100010d5b, resolved, hit count = 0 + + +Note that setting a breakpoint creates a logical breakpoint, which could +resolve to one or more locations. For instance, break by selector would set a +breakpoint on all the methods that implement that selector in the classes in +your program. Similarly, a file and line breakpoint might result in multiple +locations if that file and line were inlined in different places in your code. + +The logical breakpoint has an integer id, and it's locations have an id within +their parent breakpoint (the two are joined by a ".", e.g. 1.1 in the example +above.) + +Also the logical breakpoints remain live so that if another shared library were +to be loaded that had another implementation of the "alignLeftEdges:" selector, +the new location would be added to breakpoint 1 (e.g. a "1.2" breakpoint would +be set on the newly loaded selector). + +The other piece of information in the breakpoint listing is whether the +breakpoint location was resolved or not. A location gets resolved when the file +address it corresponds to gets loaded into the program you are debugging. For +instance if you set a breakpoint in a shared library that then gets unloaded, +that breakpoint location will remain, but it will no longer be resolved. + + +One other thing to note for gdb users is that lldb acts like gdb with: + +:: + + (gdb) set breakpoint pending on + +That is, lldb will always make a breakpoint from your specification, even if it +couldn't find any locations that match the specification. You can tell whether +the expression was resolved or not by checking the locations field in +"breakpoint list", and we report the breakpoint as "pending" when you set it so +you can tell you've made a typo more easily, if that was indeed the reason no +locations were found: + + +:: + + (lldb) breakpoint set --file foo.c --line 12 + Breakpoint created: 2: file ='foo.c', line = 12, locations = 0 (pending) + WARNING: Unable to resolve breakpoint to any actual locations. + +You can delete, disable, set conditions and ignore counts either on all the +locations generated by your logical breakpoint, or on any one of the particular +locations your specification resolved to. For instance if we wanted to add a +command to print a backtrace when we hit this breakpoint we could do: + +:: + + (lldb) breakpoint command add 1.1 + Enter your debugger command(s). Type 'DONE' to end. + > bt + > DONE + +By default, the breakpoint command add command takes lldb command line +commands. You can also specify this explicitly by passing the "--command" +option. Use "--script" if you want to implement your breakpoint command using +the Python script instead. + +This is an convenient point to bring up another feature of the lldb command +help. Do: + +:: + + (lldb) help break command add + Add a set of commands to a breakpoint, to be executed whenever the breakpoint is hit. + + Syntax: breakpoint command add + etc... + +When you see arguments to commands specified in the Syntax in angle brackets +like , that indicates that that is some common argument type that +you can get further help on from the command system. So in this case you could +do: + +:: + + (lldb) help -- Breakpoint ID's consist major and + minor numbers; the major etc... + + +Breakpoint Names +---------------- + +Breakpoints carry two orthognal sets of information: one specifies where to set the breakpoint, and the other how to react when the breakpoint is hit. The latter set of information (e.g. commands, conditions, hit-count, auto-continue...) we call breakpoint options. + +It is fairly common to want to apply one set of options to a number of breakpoints. For instance, you might want to check that self == nil and if it is, print a backtrace and continue, on a number of methods. One convenient way to do that would be to make all the breakpoints, then configure the options with: + +:: + + (lldb) breakpoint modify -c "self == nil" -C bt --auto-continue 1 2 3 + +That's not too bad, but you have to repeat this for every new breakpoint you make, and if you wanted to change the options, you have to remember all the ones you are using this way. + +Breakpoint names provide a convenient solution to this problem. The simple solution would be to use the name to gather the breakpoints you want to affect this way into a group. So when you make the breakpoint you would do: + +:: + + (lldb) breakpoint set -N SelfNil + +Then when you've made all your breakpoints, you can set up or modify the options using the name to collect all the relevant breakpoints. + +:: + + (lldb) breakpoint modify -c "self == nil" -C bt --auto-continue SelfNil + +That is better, but suffers from the problem that when new breakpoints get +added, they don't pick up these modifications, and the options only exist in +the context of actual breakpoints, so they are hard to store & reuse. + +A even better solution is to make a fully configured breakpoint name: + +:: + + (lldb) breakpoint name configure -c "self == nil" -C bt --auto-continue SelfNil + +Then you can apply the name to your breakpoints, and they will all pick up +these options. The connection from name to breakpoints remains live, so when +you change the options configured on the name, all the breakpoints pick up +those changes. This makes it easy to use configured names to experiment with +your options. + +You can make breakpoint names in your .lldbinit file, so you can use them to +can behaviors that you have found useful and reapply them in future sessions. + +You can also make a breakpoint name from the options set on a breakpoint: + +:: + + (lldb) breakpoint name configure -B 1 SelfNil + +which makes it easy to copy behavior from one breakpoint to a set of others. + +Setting Watchpoints +------------------- + +In addition to breakpoints, you can use help watchpoint to see all the commands +for watchpoint manipulations. For instance, we might do the following to watch +a variable called 'global' for write operation, but only stop if the condition +'(global==5)' is true: + +:: + + (lldb) watch set var global + Watchpoint created: Watchpoint 1: addr = 0x100001018 size = 4 state = enabled type = w + declare @ '/Volumes/data/lldb/svn/ToT/test/functionalities/watchpoint/watchpoint_commands/condition/main.cpp:12' + (lldb) watch modify -c '(global==5)' + (lldb) watch list + Current watchpoints: + Watchpoint 1: addr = 0x100001018 size = 4 state = enabled type = w + declare @ '/Volumes/data/lldb/svn/ToT/test/functionalities/watchpoint/watchpoint_commands/condition/main.cpp:12' + condition = '(global==5)' + (lldb) c + Process 15562 resuming + (lldb) about to write to 'global'... + Process 15562 stopped and was programmatically restarted. + Process 15562 stopped and was programmatically restarted. + Process 15562 stopped and was programmatically restarted. + Process 15562 stopped and was programmatically restarted. + Process 15562 stopped + * thread #1: tid = 0x1c03, 0x0000000100000ef5 a.out`modify + 21 at main.cpp:16, stop reason = watchpoint 1 + frame #0: 0x0000000100000ef5 a.out`modify + 21 at main.cpp:16 + 13 + 14 static void modify(int32_t &var) { + 15 ++var; + -> 16 } + 17 + 18 int main(int argc, char** argv) { + 19 int local = 0; + (lldb) bt + * thread #1: tid = 0x1c03, 0x0000000100000ef5 a.out`modify + 21 at main.cpp:16, stop reason = watchpoint 1 + frame #0: 0x0000000100000ef5 a.out`modify + 21 at main.cpp:16 + frame #1: 0x0000000100000eac a.out`main + 108 at main.cpp:25 + frame #2: 0x00007fff8ac9c7e1 libdyld.dylib`start + 1 + (lldb) frame var global + (int32_t) global = 5 + (lldb) watch list -v + Current watchpoints: + Watchpoint 1: addr = 0x100001018 size = 4 state = enabled type = w + declare @ '/Volumes/data/lldb/svn/ToT/test/functionalities/watchpoint/watchpoint_commands/condition/main.cpp:12' + condition = '(global==5)' + hw_index = 0 hit_count = 5 ignore_count = 0 + (lldb) + +Starting or Attaching to Your Program +------------------------------------- + +To launch a program in lldb we use the "process launch" command or one of its built in aliases: + +:: + + (lldb) process launch + (lldb) run + (lldb) r + +You can also attach to a process by process ID or process name. When attaching +to a process by name, lldb also supports the "--waitfor" option which waits for +the next process that has that name to show up, and attaches to it + +:: + + (lldb) process attach --pid 123 + (lldb) process attach --name Sketch + (lldb) process attach --name Sketch --waitfor + +After you launch or attach to a process, your process might stop somewhere: + +:: + + (lldb) process attach -p 12345 + Process 46915 Attaching + Process 46915 Stopped + 1 of 3 threads stopped with reasons: + * thread #1: tid = 0x2c03, 0x00007fff85cac76a, where = libSystem.B.dylib`__getdirentries64 + 10, stop reason = signal = SIGSTOP, queue = com.apple.main-thread + +Note the line that says "1 of 3 threads stopped with reasons:" and the lines +that follow it. In a multi-threaded environment it is very common for more than +one thread to hit your breakpoint(s) before the kernel actually returns control +to the debugger. In that case, you will see all the threads that stopped for +some interesting reason listed in the stop message. + +Controlling Your Program +------------------------ + +After launching, we can continue until we hit our breakpoint. The primitive commands for process control all exist under the "thread" command: + +:: + + (lldb) thread continue + Resuming thread 0x2c03 in process 46915 + Resuming process 46915 + (lldb) + +At present you can only operate on one thread at a time, but the design will ultimately support saying "step over the function in Thread 1, and step into the function in Thread 2, and continue Thread 3" etc. When we eventually support keeping some threads running while others are stopped this will be particularly important. For convenience, however, all the stepping commands have easy aliases. So "thread continue" is just "c", etc. + +The other program stepping commands are pretty much the same as in gdb. You've got: + +:: + + (lldb) thread step-in // The same as gdb's "step" or "s" + (lldb) thread step-over // The same as gdb's "next" or "n" + (lldb) thread step-out // The same as gdb's "finish" or "f" + +By default, lldb does defined aliases to all common gdb process control commands ("s", "step", "n", "next", "finish"). If we have missed any, please add them to your ~/.lldbinit file using the "command alias" command. + +lldb also supported the step by instruction versions: + +:: + + + (lldb) thread step-inst // The same as gdb's "stepi" / "si" + (lldb) thread step-over-inst // The same as gdb's "nexti" / "ni" + +Finally, lldb has a run until line or frame exit stepping mode: + +:: + + (lldb) thread until 100 + +This command will run the thread in the current frame till it reaches line 100 +in this frame or stops if it leaves the current frame. This is a pretty close +equivalent to gdb's "until" command. + +A process, by default, will share the lldb terminal with the inferior process. +When in this mode, much like when debugging with gdb, when the process is +running anything you type will go to the STDIN of the inferior process. To +interrupt your inferior program, type CTRL+C. + +If you attach to a process, or launch a process with the "--no-stdin" option, +the command interpreter is always available to enter commands. This might be a +little disconcerting to gdb users when always have an (lldb) prompt. This +allows you to set a breakpoint, etc without having to explicitly interrupt the +program you are debugging: + +:: + + (lldb) process continue + (lldb) breakpoint set --name stop_here + +There are many commands that won't work while running, and the command +interpreter should do a good job of letting you know when this is the case. If +you find any instances where the command interpreter isn't doing its job, +please file a bug. This way of operation will set us up for a future debugging +mode called thread centric debugging. This mode will allow us to run all +threads and only stop the threads that are at breakpoints or have exceptions or +signals. + +The commands that currently work while running include interrupting the process +to halt execution ("process interrupt"), getting the process status ("process +status"), breakpoint setting and clearing (" breakpoint +[set|clear|enable|disable|list] ..."), and memory reading and writing (" memory +[read|write] ..."). + +The question of disabling stdio when running brings up a good opportunity to +show how to set debugger properties in general. If you always want to run in +the --no-stdin mode, you can set this as a generic process property using the +lldb "settings" command, which is equivalent to gdb's "set" command. For +instance, in this case you would say: + +:: + + (lldb) settings set target.process.disable-stdio true + +Over time, gdb's "set command became a wilderness of disordered options, so +that there were useful options that even experienced gdb users didn't know +about because they were too hard to find. We tried to organize the settings +hierarchically using the structure of the basic entities in the debugger. For +the most part anywhere you can specify a setting on a generic entity (threads, +for example) you can also apply the option to a particular instance, which can +also be convenient at times. You can view the available settings with "settings +list" and there is help on the settings command explaining how it works more +generally. + +Examining Thread State +---------------------- + +Once you've stopped, lldb will choose a current thread, usually the one that +stopped "for a reason", and a current frame in that thread (on stop this is +always the bottom-most frame). Many the commands for inspecting state work on +this current thread/frame. + +To inspect the current state of your process, you can start with the threads: + +:: + + (lldb) thread list + Process 46915 state is Stopped + * thread #1: tid = 0x2c03, 0x00007fff85cac76a, where = libSystem.B.dylib`__getdirentries64 + 10, stop reason = signal = SIGSTOP, queue = com.apple.main-thread + thread #2: tid = 0x2e03, 0x00007fff85cbb08a, where = libSystem.B.dylib`kevent + 10, queue = com.apple.libdispatch-manager + thread #3: tid = 0x2f03, 0x00007fff85cbbeaa, where = libSystem.B.dylib`__workq_kernreturn + 10 + +The ``*`` indicates that Thread 1 is the current thread. To get a backtrace for +that thread, do: + +:: + + (lldb) thread backtrace + thread #1: tid = 0x2c03, stop reason = breakpoint 1.1, queue = com.apple.main-thread + frame #0: 0x0000000100010d5b, where = Sketch`-[SKTGraphicView alignLeftEdges:] + 33 at /Projects/Sketch/SKTGraphicView.m:1405 + frame #1: 0x00007fff8602d152, where = AppKit`-[NSApplication sendAction:to:from:] + 95 + frame #2: 0x00007fff860516be, where = AppKit`-[NSMenuItem _corePerformAction] + 365 + frame #3: 0x00007fff86051428, where = AppKit`-[NSCarbonMenuImpl performActionWithHighlightingForItemAtIndex:] + 121 + frame #4: 0x00007fff860370c1, where = AppKit`-[NSMenu performKeyEquivalent:] + 272 + frame #5: 0x00007fff86035e69, where = AppKit`-[NSApplication _handleKeyEquivalent:] + 559 + frame #6: 0x00007fff85f06aa1, where = AppKit`-[NSApplication sendEvent:] + 3630 + frame #7: 0x00007fff85e9d922, where = AppKit`-[NSApplication run] + 474 + frame #8: 0x00007fff85e965f8, where = AppKit`NSApplicationMain + 364 + frame #9: 0x0000000100015ae3, where = Sketch`main + 33 at /Projects/Sketch/SKTMain.m:11 + frame #10: 0x0000000100000f20, where = Sketch`start + 52 + +You can also provide a list of threads to backtrace, or the keyword "all" to see all threads: + +:: + + (lldb) thread backtrace all + +You can select the current thread, which will be used by default in all the +commands in the next section, with the "thread select" command: + +:: + + (lldb) thread select 2 + +where the thread index is just the one shown in the "thread list" listing. + + +Examining Stack Frame State +--------------------------- + +The most convenient way to inspect a frame's arguments and local variables is +to use the "frame variable" command: + +:: + + (lldb) frame variable + self = (SKTGraphicView *) 0x0000000100208b40 + _cmd = (struct objc_selector *) 0x000000010001bae1 + sender = (id) 0x00000001001264e0 + selection = (NSArray *) 0x00000001001264e0 + i = (NSUInteger) 0x00000001001264e0 + c = (NSUInteger) 0x00000001001253b0 + +As you see above, if you don't specify any variable names, all arguments and +locals will be shown. If you call "frame variable" passing in the names of a +particular local(s), only those variables will be printed. For instance: + +:: + + (lldb) frame variable self + (SKTGraphicView *) self = 0x0000000100208b40 + +You can also pass in a path to some subelement of one of the available locals, +and that sub-element will be printed. For instance: + +:: + + (lldb) frame variable self.isa + (struct objc_class *) self.isa = 0x0000000100023730 + +The "frame variable" command is not a full expression parser but it does +support a few simple operations like ``&``, ``*``, ``->``, ``[]`` (no +overloaded operators). The array brackets can be used on pointers to treat +pointers as arrays: + +:: + + (lldb) frame variable *self + (SKTGraphicView *) self = 0x0000000100208b40 + (NSView) NSView = { + (NSResponder) NSResponder = { + ... + + (lldb) frame variable &self + (SKTGraphicView **) &self = 0x0000000100304ab + + (lldb) frame variable argv[0] + (char const *) argv[0] = 0x00007fff5fbffaf8 "/Projects/Sketch/build/Debug/Sketch.app/Contents/MacOS/Sketch" + +The frame variable command will also perform "object printing" operations on +variables (currently we only support ObjC printing, using the object's +"description" method. Turn this on by passing the -o flag to frame variable: + +:: + + (lldb) frame variable -o self (SKTGraphicView *) self = 0x0000000100208b40 + You can select another frame to view with the "frame select" command + + (lldb) frame select 9 + frame #9: 0x0000000100015ae3, where = Sketch`function1 + 33 at /Projects/Sketch/SKTFunctions.m:11 + +You can also move up and down the stack by passing the "--relative" ("-r") option. And we have built-in aliases "u" and "d" which behave like their gdb equivalents. Index: docs/use/variable.rst =================================================================== --- /dev/null +++ docs/use/variable.rst @@ -0,0 +1,1135 @@ +Variable Formatting +=================== + +.. contents:: + :local: + +LLDB has a data formatters subsystem that allows users to define custom display +options for their variables. + +Usually, when you type frame variable or run some expression LLDB will +automatically choose the way to display your results on a per-type basis, as in +the following example: + +:: + + (lldb) frame variable + (uint8_t) x = 'a' + (intptr_t) y = 124752287 + +However, in certain cases, you may want to associate a different style to the display for certain datatypes. To do so, you need to give hints to the debugger +as to how variables should be displayed. The LLDB type command allows you to do +just that. + +Using it you can change your visualization to look like this: + +:: + + (lldb) frame variable + (uint8_t) x = chr='a' dec=65 hex=0x41 + (intptr_t) y = 0x76f919f + +There are several features related to data visualization: formats, summaries, +filters, synthetic children. + +To reflect this, the type command has five subcommands: + +:: + + type format + type summary + type filter + type synthetic + type category + +These commands are meant to bind printing options to types. When variables are +printed, LLDB will first check if custom printing options have been associated +to a variable's type and, if so, use them instead of picking the default +choices. + +Each of the commands (except ``type category``) has four subcommands available: + +- ``add``: associates a new printing option to one or more types +- ``delete``: deletes an existing association +- ``list``: provides a listing of all associations +- ``clear``: deletes all associations + +Type Format +----------- + +Type formats enable you to quickly override the default format for displaying +primitive types (the usual basic C/C++/ObjC types: int, float, char, ...). + +If for some reason you want all int variables in your program to print out as +hex, you can add a format to the int type. + +This is done by typing + +:: + + (lldb) type format add --format hex int + +at the LLDB command line. + +The ``--format`` (which you can shorten to -f) option accepts a :doc:`format +name`. Then, you provide one or more types to which you want the +new format applied. + +A frequent scenario is that your program has a typedef for a numeric type that +you know represents something that must be printed in a certain way. Again, you +can add a format just to that typedef by using type format add with the name +alias. + +But things can quickly get hierarchical. Let's say you have a situation like +the following: + +:: + + typedef int A; + typedef A B; + typedef B C; + typedef C D; + +and you want to show all A's as hex, all C's as byte arrays and leave the +defaults untouched for other types (albeit its contrived look, the example is +far from unrealistic in large software systems). + +If you simply type + +:: + + (lldb) type format add -f hex A + (lldb) type format add -f uint8_t[] C + +values of type B will be shown as hex and values of type D as byte arrays, as in: + +:: + + (lldb) frame variable -T + (A) a = 0x00000001 + (B) b = 0x00000002 + (C) c = {0x03 0x00 0x00 0x00} + (D) d = {0x04 0x00 0x00 0x00} + +This is because by default LLDB cascades formats through typedef chains. In +order to avoid that you can use the option -C no to prevent cascading, thus +making the two commands required to achieve your goal: + +:: + + (lldb) type format add -C no -f hex A + (lldb) type format add -C no -f uint8_t[] C + + +which provides the desired output: + +:: + + (lldb) frame variable -T + (A) a = 0x00000001 + (B) b = 2 + (C) c = {0x03 0x00 0x00 0x00} + (D) d = 4 + +Two additional options that you will want to look at are --skip-pointers (-p) +and --skip-references (-r). These two options prevent LLDB from applying a +format for type T to values of type T* and T& respectively. + +:: + + (lldb) type format add -f float32[] int + (lldb) frame variable pointer *pointer -T + (int *) pointer = {1.46991e-39 1.4013e-45} + (int) *pointer = {1.53302e-42} + (lldb) type format add -f float32[] int -p + (lldb) frame variable pointer *pointer -T + (int *) pointer = 0x0000000100100180 + (int) *pointer = {1.53302e-42} + +While they can be applied to pointers and references, formats will make no +attempt to dereference the pointer and extract the value before applying the +format, which means you are effectively formatting the address stored in the +pointer rather than the pointee value. For this reason, you may want to use the +-p option when defining formats. + +If you need to delete a custom format simply type type format delete followed +by the name of the type to which the format applies.Even if you defined the +same format for multiple types on the same command, type format delete will +only remove the format for the type name passed as argument. + +To delete ALL formats, use ``type format clear``. To see all the formats +defined, use type format list. + +If all you need to do, however, is display one variable in a custom format, +while leaving the others of the same type untouched, you can simply type: + +:: + + (lldb) frame variable counter -f hex + +This has the effect of displaying the value of counter as an hexadecimal +number, and will keep showing it this way until you either pick a different +format or till you let your program run again. + +Finally, this is a list of formatting options available out of which you can +pick: + ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| **Format name** | **Abbreviation** | **Description** | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``default`` | | the default LLDB algorithm is used to pick a format | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``boolean`` | B | show this as a true/false boolean, using the customary rule that 0 is | +| | | false and everything else is true | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``binary`` | b | show this as a sequence of bits | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``bytes`` | y | show the bytes one after the other | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``bytes with ASCII`` | Y | show the bytes, but try to display them as ASCII characters as well | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``character`` | c | show the bytes as ASCII characters | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``printable character`` | C | show the bytes as printable ASCII characters | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``complex float`` | F | interpret this value as the real and imaginary part of a complex | +| | | floating-point number | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``c-string`` | s | show this as a 0-terminated C string | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``decimal`` | i | show this as a signed integer number (this does not perform a cast, it | +| | | simply shows the bytes as an integer with sign) | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``enumeration`` | E | show this as an enumeration, printing the | +| | | value's name if available or the integer value otherwise | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``hex`` | x | show this as in hexadecimal notation (this does | +| | | not perform a cast, it simply shows the bytes as hex) | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``float`` | f | show this as a floating-point number (this does not perform a cast, it | +| | | simply interprets the bytes as an IEEE754 floating-point value) | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``octal`` | o | show this in octal notation | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``OSType`` | O | show this as a MacOS OSType | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``unicode16`` | U | show this as UTF-16 characters | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``unicode32`` | | show this as UTF-32 characters | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``unsigned decimal`` | u | show this as an unsigned integer number (this does not perform a cast, | +| | | it simply shows the bytes as unsigned integer) | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``pointer`` | p | show this as a native pointer (unless this is really a pointer, the | +| | | resulting address will probably be invalid) | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``char[]`` | | show this as an array of characters | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``int8_t[], uint8_t[]`` | | show this as an array of the corresponding integer type | +| ``int16_t[], uint16_t[]`` | | | +| ``int32_t[], uint32_t[]`` | | | +| ``int64_t[], uint64_t[]`` | | | +| ``uint128_t[]`` | | | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``float32[], float64[]`` | | show this as an array of the corresponding | +| | | floating-point type | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``complex integer`` | I | interpret this value as the real and imaginary part of a complex integer | +| | | number | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ +| ``character array`` | a | show this as a character array | ++-----------------------------------------------+------------------+--------------------------------------------------------------------------+ + +Type Summary +------------ + +Type formats work by showing a different kind of display for the value of a +variable. However, they only work for basic types. When you want to display a +class or struct in a custom format, you cannot do that using formats. + +A different feature, type summaries, works by extracting information from +classes, structures, ... (aggregate types) and arranging it in a user-defined +format, as in the following example: + +before adding a summary... + +:: + + (lldb) frame variable -T one + (i_am_cool) one = { + (int) x = 3 + (float) y = 3.14159 + (char) z = 'E' + } + +after adding a summary... + +:: + + (lldb) frame variable one + (i_am_cool) one = int = 3, float = 3.14159, char = 69 + +There are two ways to use type summaries: the first one is to bind a summary +string to the type; the second is to write a Python script that returns the +string to be used as summary. Both options are enabled by the type summary add +command. + +The command to obtain the output shown in the example is: + +:: + +(lldb) type summary add --summary-string "int = ${var.x}, float = ${var.y}, char = ${var.z%u}" i_am_cool + +Initially, we will focus on summary strings, and then describe the Python +binding mechanism. + +Summary Strings +--------------- + +Summary strings are written using a simple control language, exemplified by the +snippet above. A summary string contains a sequence of tokens that are +processed by LLDB to generate the summary. + +Summary strings can contain plain text, control characters and special +variables that have access to information about the current object and the +overall program state. + +Plain text is any sequence of characters that doesn't contain a ``{``, ``}``, ``$``, +or ``\`` character, which are the syntax control characters. + +The special variables are found in between a "${" prefix, and end with a "}" +suffix. Variables can be a simple name or they can refer to complex objects +that have subitems themselves. In other words, a variable looks like +``${object}`` or ``${object.child.otherchild}``. A variable can also be +prefixed or suffixed with other symbols meant to change the way its value is +handled. An example is ``${*var.int_pointer[0-3]}``. + +Basically, the syntax is the same one described Frame and Thread Formatting +plus additional symbols specific for summary strings. The main of them is +${var, which is used refer to the variable that a summary is being created for. + +The simplest thing you can do is grab a member variable of a class or structure +by typing its expression path. In the previous example, the expression path for +the field float y is simply .y. Thus, to ask the summary string to display y +you would type ${var.y}. + +If you have code like the following: + +:: + + struct A { + int x; + int y; + }; + struct B { + A x; + A y; + int *z; + }; + +the expression path for the y member of the x member of an object of type B +would be .x.y and you would type ``${var.x.y}`` to display it in a summary +string for type B. + +By default, a summary defined for type T, also works for types T* and T& (you +can disable this behavior if desired). For this reason, expression paths do not +differentiate between . and ->, and the above expression path .x.y would be +just as good if you were displaying a B*, or even if the actual definition of B +were: + +:: + + struct B { + A *x; + A y; + int *z; + }; + +This is unlike the behavior of frame variable which, on the contrary, will +enforce the distinction. As hinted above, the rationale for this choice is that +waiving this distinction enables you to write a summary string once for type T +and use it for both T and T* instances. As a summary string is mostly about +extracting nested members' information, a pointer to an object is just as good +as the object itself for the purpose. + +If you need to access the value of the integer pointed to by B::z, you cannot +simply say ${var.z} because that symbol refers to the pointer z. In order to +dereference it and get the pointed value, you should say ``${*var.z}``. The +``${*var`` tells LLDB to get the object that the expression paths leads to, and +then dereference it. In this example is it equivalent to ``*(bObject.z)`` in +C/C++ syntax. Because . and -> operators can both be used, there is no need to +have dereferences in the middle of an expression path (e.g. you do not need to +type ``${*(var.x).x}``) to read A::x as contained in ``*(B::x)``. To achieve +that effect you can simply write ``${var.x->x}``, or even ``${var.x.x}``. The +``*`` operator only binds to the result of the whole expression path, rather +than piecewise, and there is no way to use parentheses to change that behavior. + +Of course, a summary string can contain more than one ${var specifier, and can +use ``${var`` and ``${*var`` specifiers together. + +Formatting Summary Elements +--------------------------- + +An expression path can include formatting codes. Much like the type formats +discussed previously, you can also customize the way variables are displayed in +summary strings, regardless of the format they have applied to their types. To +do that, you can use %format inside an expression path, as in ${var.x->x%u}, +which would display the value of x as an unsigned integer. + +You can also use some other special format markers, not available for formats +themselves, but which carry a special meaning when used in this context: + ++------------+--------------------------------------------------------------------------+ +| **Symbol** | **Description** | ++------------+--------------------------------------------------------------------------+ +| ``Symbol`` | ``Description`` | ++------------+--------------------------------------------------------------------------+ +| ``%S`` | Use this object's summary (the default for aggregate types) | ++------------+--------------------------------------------------------------------------+ +| ``%V`` | Use this object's value (the default for non-aggregate types) | ++------------+--------------------------------------------------------------------------+ +| ``%@`` | Use a language-runtime specific description (for C++ this does nothing, | +| | for Objective-C it calls the NSPrintForDebugger API) | ++------------+--------------------------------------------------------------------------+ +| ``%L`` | Use this object's location (memory address, register name, ...) | ++------------+--------------------------------------------------------------------------+ +| ``%#`` | Use the count of the children of this object | ++------------+--------------------------------------------------------------------------+ +| ``%T`` | Use this object's datatype name | ++------------+--------------------------------------------------------------------------+ +| ``%N`` | Print the variable's basename | ++------------+--------------------------------------------------------------------------+ +| ``%>`` | Print the expression path for this item | ++------------+--------------------------------------------------------------------------+ + +Starting with SVN r228207, you can also specify +``${script.var:pythonFuncName}``. Previously, back to r220821, this was +specified with a different syntax: ``${var.script:pythonFuncName}``. + +It is expected that the function name you use specifies a function whose +signature is the same as a Python summary function. The return string from the +function will be placed verbatim in the output. + +You cannot use element access, or formatting symbols, in combination with this +syntax. For example the following: + +:: + + ${script.var.element[0]:myFunctionName%@} + +is not valid and will cause the summary to fail to evaluate. + + +Element Inlining +---------------- + +Option --inline-children (-c) to type summary add tells LLDB not to look for a summary string, but instead to just print a listing of all the object's children on one line. + +As an example, given a type pair: + +:: + + (lldb) frame variable --show-types a_pair + (pair) a_pair = { + (int) first = 1; + (int) second = 2; + } + +If one types the following commands: + +:: + + (lldb) type summary add --inline-children pair + +the output becomes: + +:: + + (lldb) frame variable a_pair + (pair) a_pair = (first=1, second=2) + + +Of course, one can obtain the same effect by typing + +:: + + (lldb) type summary add pair --summary-string "(first=${var.first}, second=${var.second})" + +While the final result is the same, using --inline-children can often save +time. If one does not need to see the names of the variables, but just their +values, the option --omit-names (-O, uppercase letter o), can be combined with +--inline-children to obtain: + +:: + + (lldb) frame variable a_pair + (pair) a_pair = (1, 2) + +which is of course the same as typing + +:: + + (lldb) type summary add pair --summary-string "(${var.first}, ${var.second})" + +Bitfields And Array Syntax +-------------------------- + +Sometimes, a basic type's value actually represents several different values +packed together in a bitfield. + +With the classical view, there is no way to look at them. Hexadecimal display +can help, but if the bits actually span nibble boundaries, the help is limited. + +Binary view would show it all without ambiguity, but is often too detailed and +hard to read for real-life scenarios. + +To cope with the issue, LLDB supports native bitfield formatting in summary +strings. If your expression paths leads to a so-called scalar type (the usual +int, float, char, double, short, long, long long, double, long double and +unsigned variants), you can ask LLDB to only grab some bits out of the value +and display them in any format you like. If you only need one bit you can use +the [n], just like indexing an array. To extract multiple bits, you can use a +slice-like syntax: [n-m], e.g. + +:: + + (lldb) frame variable float_point + (float) float_point = -3.14159 + +:: + + (lldb) type summary add --summary-string "Sign: ${var[31]%B} Exponent: ${var[30-23]%x} Mantissa: ${var[0-22]%u}" float + (lldb) frame variable float_point + (float) float_point = -3.14159 Sign: true Exponent: 0x00000080 Mantissa: 4788184 + +In this example, LLDB shows the internal representation of a float variable by +extracting bitfields out of a float object. + +When typing a range, the extremes n and m are always included, and the order of +the indices is irrelevant. + +LLDB also allows to use a similar syntax to display array members inside a summary string. For instance, you may want to display all arrays of a given type using a more compact notation than the default, and then just delve into individual array members that prove interesting to your debugging task. You can tell LLDB to format arrays in special ways, possibly independent of the way the array members' datatype is formatted. +e.g. + +:: + + (lldb) frame variable sarray + (Simple [3]) sarray = { + [0] = { + x = 1 + y = 2 + z = '\x03' + } + [1] = { + x = 4 + y = 5 + z = '\x06' + } + [2] = { + x = 7 + y = 8 + z = '\t' + } + } + + (lldb) type summary add --summary-string "${var[].x}" "Simple [3]" + + (lldb) frame variable sarray + (Simple [3]) sarray = [1,4,7] + +The [] symbol amounts to: if var is an array and I know its size, apply this summary string to every element of the array. Here, we are asking LLDB to display .x for every element of the array, and in fact this is what happens. If you find some of those integers anomalous, you can then inspect that one item in greater detail, without the array format getting in the way: + +:: + + (lldb) frame variable sarray[1] + (Simple) sarray[1] = { + x = 4 + y = 5 + z = '\x06' + } + +You can also ask LLDB to only print a subset of the array range by using the +same syntax used to extract bit for bitfields: + +:: + + (lldb) type summary add --summary-string "${var[1-2].x}" "Simple [3]" + + (lldb) frame variable sarray + (Simple [3]) sarray = [4,7] + +If you are dealing with a pointer that you know is an array, you can use this +syntax to display the elements contained in the pointed array instead of just +the pointer value. However, because pointers have no notion of their size, the +empty brackets [] operator does not work, and you must explicitly provide +higher and lower bounds. + +In general, LLDB needs the square brackets operator [] in order to handle +arrays and pointers correctly, and for pointers it also needs a range. However, +a few special cases are defined to make your life easier: + +you can print a 0-terminated string (C-string) using the %s format, omitting +square brackets, as in: + +:: + + (lldb) type summary add --summary-string "${var%s}" "char *" + +This syntax works for char* as well as for char[] because LLDB can rely on the +final \0 terminator to know when the string has ended. + +LLDB has default summary strings for char* and char[] that use this special +case. On debugger startup, the following are defined automatically: + +:: + + (lldb) type summary add --summary-string "${var%s}" "char *" + (lldb) type summary add --summary-string "${var%s}" -x "char \[[0-9]+]" + +any of the array formats (int8_t[], float32{}, ...), and the y, Y and a formats +work to print an array of a non-aggregate type, even if square brackets are +omitted. + +:: + + (lldb) type summary add --summary-string "${var%int32_t[]}" "int [10]" + +This feature, however, is not enabled for pointers because there is no way for +LLDB to detect the end of the pointed data. + +This also does not work for other formats (e.g. boolean), and you must specify +the square brackets operator to get the expected output. + +Python Scripting +---------------- + +Most of the times, summary strings prove good enough for the job of summarizing +the contents of a variable. However, as soon as you need to do more than +picking some values and rearranging them for display, summary strings stop +being an effective tool. This is because summary strings lack the power to +actually perform any kind of computation on the value of variables. + +To solve this issue, you can bind some Python scripting code as a summary for +your datatype, and that script has the ability to both extract children +variables as the summary strings do and to perform active computation on the +extracted values. As a small example, let's say we have a Rectangle class: + +:: + + + class Rectangle + { + private: + int height; + int width; + public: + Rectangle() : height(3), width(5) {} + Rectangle(int H) : height(H), width(H*2-1) {} + Rectangle(int H, int W) : height(H), width(W) {} + int GetHeight() { return height; } + int GetWidth() { return width; } + }; + +Summary strings are effective to reduce the screen real estate used by the +default viewing mode, but are not effective if we want to display the area and +perimeter of Rectangle objects + +To obtain this, we can simply attach a small Python script to the Rectangle +class, as shown in this example: + +:: + + (lldb) type summary add -P Rectangle + Enter your Python command(s). Type 'DONE' to end. + def function (valobj,internal_dict): + height_val = valobj.GetChildMemberWithName('height') + width_val = valobj.GetChildMemberWithName('width') + height = height_val.GetValueAsUnsigned(0) + width = width_val.GetValueAsUnsigned(0) + area = height*width + perimeter = 2*(height + width) + return 'Area: ' + str(area) + ', Perimeter: ' + str(perimeter) + DONE + (lldb) frame variable + (Rectangle) r1 = Area: 20, Perimeter: 18 + (Rectangle) r2 = Area: 72, Perimeter: 36 + (Rectangle) r3 = Area: 16, Perimeter: 16 + +In order to write effective summary scripts, you need to know the LLDB public +API, which is the way Python code can access the LLDB object model. For further +details on the API you should look at the LLDB API reference documentation. + + +As a brief introduction, your script is encapsulated into a function that is +passed two parameters: ``valobj`` and ``internal_dict``. + +``internal_dict`` is an internal support parameter used by LLDB and you should +not touch it. + +``valobj`` is the object encapsulating the actual variable being displayed, and +its type is SBValue. Out of the many possible operations on an SBValue, the +basic one is retrieve the children objects it contains (essentially, the fields +of the object wrapped by it), by calling ``GetChildMemberWithName()``, passing +it the child's name as a string. + +If the variable has a value, you can ask for it, and return it as a string +using ``GetValue()``, or as a signed/unsigned number using +``GetValueAsSigned()``, ``GetValueAsUnsigned()``. It is also possible to +retrieve an SBData object by calling ``GetData()`` and then read the object's +contents out of the SBData. + +If you need to delve into several levels of hierarchy, as you can do with +summary strings, you can use the method ``GetValueForExpressionPath()``, +passing it an expression path just like those you could use for summary strings +(one of the differences is that dereferencing a pointer does not occur by +prefixing the path with a ``*```, but by calling the ``Dereference()`` method +on the returned SBValue). If you need to access array slices, you cannot do +that (yet) via this method call, and you must use ``GetChildAtIndex()`` +querying it for the array items one by one. Also, handling custom formats is +something you have to deal with on your own. + +Other than interactively typing a Python script there are two other ways for +you to input a Python script as a summary: + +- using the --python-script option to type summary add and typing the script + code as an option argument; as in: + +:: + + (lldb) type summary add --python-script "height = valobj.GetChildMemberWithName('height').GetValueAsUnsigned(0);width = valobj.GetChildMemberWithName('width').GetValueAsUnsigned(0); return 'Area: %d' % (height*width)" Rectangle + + +- using the --python-function (-F) option to type summary add and giving the + name of a Python function with the correct prototype. Most probably, you will + define (or have already defined) the function in the interactive interpreter, + or somehow loaded it from a file, using the command script import command. + LLDB will emit a warning if it is unable to find the function you passed, but + will still register the binding. + +Starting in SVN r222593, Python summary formatters can optionally define a +third argument: options + +This is an object of type ``lldb.SBTypeSummaryOptions`` that can be passed into +the formatter, allowing for a few customizations of the result. The decision to +adopt or not this third argument - and the meaning of options thereof - is +within the individual formatters' writer. + +Regular Expression Typenames +---------------------------- + +As you noticed, in order to associate the custom summary string to the array +types, one must give the array size as part of the typename. This can long +become tiresome when using arrays of different sizes, Simple [3], Simple [9], +Simple [12], ... + +If you use the -x option, type names are treated as regular expressions instead +of type names. This would let you rephrase the above example for arrays of type +Simple [3] as: + +:: + (lldb) type summary add --summary-string "${var[].x}" -x "Simple \[[0-9]+\]" + (lldb) frame variable + (Simple [3]) sarray = [1,4,7] + (Simple [2]) sother = [3,6] + +The above scenario works for Simple [3] as well as for any other array of +Simple objects. + +While this feature is mostly useful for arrays, you could also use regular +expressions to catch other type sets grouped by name. However, as regular +expression matching is slower than normal name matching, LLDB will first try to +match by name in any way it can, and only when this fails, will it resort to +regular expression matching. + +One of the ways LLDB uses this feature internally, is to match the names of STL +container classes, regardless of the template arguments provided. The details +for this are found at FormatManager.cpp + +The regular expression language used by LLDB is the POSIX extended language, as +defined by the Single UNIX Specification, of which Mac OS X is a compliant +implementation. + +Names Summaries +--------------- + +For a given type, there may be different meaningful summary representations. +However, currently, only one summary can be associated to a type at each +moment. If you need to temporarily override the association for a variable, +without changing the summary string for to its type, you can use named +summaries. + +Named summaries work by attaching a name to a summary when creating it. Then, +when there is a need to attach the summary to a variable, the frame variable +command, supports a --summary option that tells LLDB to use the named summary +given instead of the default one. + +:: + (lldb) type summary add --summary-string "x=${var.integer}" --name NamedSummary + (lldb) frame variable one + (i_am_cool) one = int = 3, float = 3.14159, char = 69 + (lldb) frame variable one --summary NamedSummary + (i_am_cool) one = x=3 + +When defining a named summary, binding it to one or more types becomes +optional. Even if you bind the named summary to a type, and later change the +summary string for that type, the named summary will not be changed by that. +You can delete named summaries by using the type summary delete command, as if +the summary name was the datatype that the summary is applied to + +A summary attached to a variable using the --summary option, has the same +semantics that a custom format attached using the -f option has: it stays +attached till you attach a new one, or till you let your program run again. + +Synthetic Children +------------------ + +Summaries work well when one is able to navigate through an expression path. In +order for LLDB to do so, appropriate debugging information must be available. + +Some types are opaque, i.e. no knowledge of their internals is provided. When +that's the case, expression paths do not work correctly. + +In other cases, the internals are available to use in expression paths, but +they do not provide a user-friendly representation of the object's value. + +For instance, consider an STL vector, as implemented by the GNU C++ Library: + +:: + + (lldb) frame variable numbers -T + (std::vector) numbers = { + (std::_Vector_base >) std::_Vector_base > = { + (std::_Vector_base >::_Vector_impl) _M_impl = { + (int *) _M_start = 0x00000001001008a0 + (int *) _M_finish = 0x00000001001008a8 + (int *) _M_end_of_storage = 0x00000001001008a8 + } + } + } + +Here, you can see how the type is implemented, and you can write a summary for +that implementation but that is not going to help you infer what items are +actually stored in the vector. + +What you would like to see is probably something like: + +:: + + (lldb) frame variable numbers -T + (std::vector) numbers = { + (int) [0] = 1 + (int) [1] = 12 + (int) [2] = 123 + (int) [3] = 1234 + } + +Synthetic children are a way to get that result. + +The feature is based upon the idea of providing a new set of children for a +variable that replaces the ones available by default through the debug +information. In the example, we can use synthetic children to provide the +vector items as children for the std::vector object. + +In order to create synthetic children, you need to provide a Python class that +adheres to a given interface (the word is italicized because Python has no +explicit notion of interface, by that word we mean a given set of methods must +be implemented by the Python class): + +:: + + class SyntheticChildrenProvider: + def __init__(self, valobj, internal_dict): + this call should initialize the Python object using valobj as the variable to provide synthetic children for + def num_children(self): + this call should return the number of children that you want your object to have + def get_child_index(self,name): + this call should return the index of the synthetic child whose name is given as argument + def get_child_at_index(self,index): + this call should return a new LLDB SBValue object representing the child at the index given as argument + def update(self): + this call should be used to update the internal state of this Python object whenever the state of the variables in LLDB changes.[1] + def has_children(self): + this call should return True if this object might have children, and False if this object can be guaranteed not to have children.[2] + def get_value(self): + this call can return an SBValue to be presented as the value of the synthetic value under consideration.[3] + +[1] This method is optional. Also, it may optionally choose to return a value +(starting with SVN rev153061/LLDB-134). If it returns a value, and that value +is True, LLDB will be allowed to cache the children and the children count it +previously obtained, and will not return to the provider class to ask. If +nothing, None, or anything other than True is returned, LLDB will discard the +cached information and ask. Regardless, whenever necessary LLDB will call +update. + +[2] This method is optional (starting with SVN rev166495/LLDB-175). While +implementing it in terms of num_children is acceptable, implementors are +encouraged to look for optimized coding alternatives whenever reasonable. + +[3] This method is optional (starting with SVN revision 219330). The SBValue +you return here will most likely be a numeric type (int, float, ...) as its +value bytes will be used as-if they were the value of the root SBValue proper. +As a shortcut for this, you can inherit from lldb.SBSyntheticValueProvider, and +just define get_value as other methods are defaulted in the superclass as +returning default no-children responses. + +If a synthetic child provider supplies a special child named $$dereference$$ +then it will be used when evaluating opertaor* and operator-> in the frame +variable command and related SB API functions. + +For examples of how synthetic children are created, you are encouraged to look +at examples/synthetic in the LLDB trunk. Please, be aware that the code in +those files (except bitfield/) is legacy code and is not maintained. You may +especially want to begin looking at this example to get a feel for this +feature, as it is a very easy and well commented example. + +The design pattern consistently used in synthetic providers shipping with LLDB +is to use the __init__ to store the SBValue instance as a part of self. The +update function is then used to perform the actual initialization. Once a +synthetic children provider is written, one must load it into LLDB before it +can be used. Currently, one can use the LLDB script command to type Python code +interactively, or use the command script import fileName command to load Python +code from a Python module (ordinary rules apply to importing modules this way). +A third option is to type the code for the provider class interactively while +adding it. + +For example, let's pretend we have a class Foo for which a synthetic children +provider class Foo_Provider is available, in a Python module contained in file +~/Foo_Tools.py. The following interaction sets Foo_Provider as a synthetic +children provider in LLDB: + +:: + + (lldb) command script import ~/Foo_Tools.py + (lldb) type synthetic add Foo --python-class Foo_Tools.Foo_Provider + (lldb) frame variable a_foo + (Foo) a_foo = { + x = 1 + y = "Hello world" + } + +LLDB has synthetic children providers for a core subset of STL classes, both in +the version provided by libstdcpp and by libcxx, as well as for several +Foundation classes. + +Synthetic children extend summary strings by enabling a new special variable: +``${svar``. + +This symbol tells LLDB to refer expression paths to the synthetic children +instead of the real ones. For instance, + +:: + + (lldb) type summary add --expand -x "std::vector<" --summary-string "${svar%#} items" + (lldb) frame variable numbers + (std::vector) numbers = 4 items { + (int) [0] = 1 + (int) [1] = 12 + (int) [2] = 123 + (int) [3] = 1234 + } + +In some cases, if LLDB is unable to use the real object to get a child +specified in an expression path, it will automatically refer to the synthetic +children. While in summaries it is best to always use ${svar to make your +intentions clearer, interactive debugging can benefit from this behavior, as +in: + +:: + + (lldb) frame variable numbers[0] numbers[1] + (int) numbers[0] = 1 + (int) numbers[1] = 12 + +Unlike many other visualization features, however, the access to synthetic +children only works when using frame variable, and is not supported in +expression: + +:: + + (lldb) expression numbers[0] + Error [IRForTarget]: Call to a function '_ZNSt33vector >ixEm' that is not present in the target + error: Couldn't convert the expression to DWARF + +The reason for this is that classes might have an overloaded operator [], or +other special provisions and the expression command chooses to ignore synthetic +children in the interest of equivalency with code you asked to have compiled +from source. + +Filters +------- + +Filters are a solution to the display of complex classes. At times, classes +have many member variables but not all of these are actually necessary for the +user to see. + +A filter will solve this issue by only letting the user see those member +variables he cares about. Of course, the equivalent of a filter can be +implemented easily using synthetic children, but a filter lets you get the job +done without having to write Python code. + +For instance, if your class Foobar has member variables named A thru Z, but you +only need to see the ones named B, H and Q, you can define a filter: + +:: + + (lldb) type filter add Foobar --child B --child H --child Q + (lldb) frame variable a_foobar + (Foobar) a_foobar = { + (int) B = 1 + (char) H = 'H' + (std::string) Q = "Hello world" + } + +Objective-C Dynamic Type Discovery +---------------------------------- + +When doing Objective-C development, you may notice that some of your variables +come out as of type id (for instance, items extracted from NSArray). By +default, LLDB will not show you the real type of the object. it can actually +dynamically discover the type of an Objective-C variable, much like the runtime +itself does when invoking a selector. In order to be shown the result of that +discovery that, however, a special option to frame variable or expression is +required: ``--dynamic-type``. + + +``--dynamic-type`` can have one of three values: + +- ``no-dynamic-values``: the default, prevents dynamic type discovery +- ``no-run-target``: enables dynamic type discovery as long as running code on + the target is not required +- ``run-target``: enables code execution on the target in order to perform + dynamic type discovery + +If you specify a value of either no-run-target or run-target, LLDB will detect +the dynamic type of your variables and show the appropriate formatters for +them. As an example: + +:: + + (lldb) expr @"Hello" + (NSString *) $0 = 0x00000001048000b0 @"Hello" + (lldb) expr -d no-run @"Hello" + (__NSCFString *) $1 = 0x00000001048000b0 @"Hello" + +Because LLDB uses a detection algorithm that does not need to invoke any +functions on the target process, no-run-target is enough for this to work. + +As a side note, the summary for NSString shown in the example is built right +into LLDB. It was initially implemented through Python (the code is still +available for reference at CFString.py). However, this is out of sync with the +current implementation of the NSString formatter (which is a C++ function +compiled into the LLDB core). + +Categories +---------- + +Categories are a way to group related formatters. For instance, LLDB itself +groups the formatters for the libstdc++ types in a category named +gnu-libstdc++. Basically, categories act like containers in which to store +formatters for a same library or OS release. + +By default, several categories are created in LLDB: + +- default: this is the category where every formatter ends up, unless another category is specified +- objc: formatters for basic and common Objective-C types that do not specifically depend on Mac OS X +- gnu-libstdc++: formatters for std::string, std::vector, std::list and std::map as implemented by libstdcpp +- libcxx: formatters for std::string, std::vector, std::list and std::map as implemented by libcxx +- system: truly basic types for which a formatter is required +- AppKit: Cocoa classes +- CoreFoundation: CF classes +- CoreGraphics: CG classes +- CoreServices: CS classes +- VectorTypes: compact display for several vector types + +If you want to use a custom category for your formatters, all the type ... add +provide a --category (-w) option, that names the category to add the formatter +to. To delete the formatter, you then have to specify the correct category. + +Categories can be in one of two states: enabled and disabled. A category is +initially disabled, and can be enabled using the type category enable command. +To disable an enabled category, the command to use is type category disable. + +The order in which categories are enabled or disabled is significant, in that +LLDB uses that order when looking for formatters. Therefore, when you enable a +category, it becomes the second one to be searched (after default, which always +stays on top of the list). The default categories are enabled in such a way +that the search order is: + +- default +- objc +- CoreFoundation +- AppKit +- CoreServices +- CoreGraphics +- gnu-libstdc++ +- libcxx +- VectorTypes +- system + +As said, gnu-libstdc++ and libcxx contain formatters for C++ STL data types. +system contains formatters for char* and char[], which reflect the behavior of +older versions of LLDB which had built-in formatters for these types. Because +now these are formatters, you can even replace them with your own if so you +wish. + +There is no special command to create a category. When you place a formatter in +a category, if that category does not exist, it is automatically created. For +instance, + +:: + + (lldb) type summary add Foobar --summary-string "a foobar" --category newcategory + +automatically creates a (disabled) category named newcategory. + +Another way to create a new (empty) category, is to enable it, as in: + +:: + + (lldb) type category enable newcategory + +However, in this case LLDB warns you that enabling an empty category has no +effect. If you add formatters to the category after enabling it, they will be +honored. But an empty category per se does not change the way any type is +displayed. The reason the debugger warns you is that enabling an empty category +might be a typo, and you effectively wanted to enable a similarly-named but +not-empty category. + +Finding Formatters 101 +---------------------- + +Searching for a formatter (including formats, starting in SVN rev r192217) +given a variable goes through a rather intricate set of rules. Namely, what +happens is that LLDB starts looking in each enabled category, according to the +order in which they were enabled (latest enabled first). In each category, LLDB +does the following: + +- If there is a formatter for the type of the variable, use it +- If this object is a pointer, and there is a formatter for the pointee type + that does not skip pointers, use it +- If this object is a reference, and there is a formatter for the referred type + that does not skip references, use it +- If this object is an Objective-C class and dynamic types are enabled, look + for a formatter for the dynamic type of the object. If dynamic types are + disabled, or the search failed, look for a formatter for the declared type of + the object +- If this object's type is a typedef, go through typedef hierarchy (LLDB might + not be able to do this if the compiler has not emitted enough information. If + the required information to traverse typedef hierarchies is missing, type + cascading will not work. The clang compiler, part of the LLVM project, emits + the correct debugging information for LLDB to cascade). If at any level of + the hierarchy there is a valid formatter that can cascade, use it. +- If everything has failed, repeat the above search, looking for regular + expressions instead of exact matches + +If any of those attempts returned a valid formatter to be used, that one is +used, and the search is terminated (without going to look in other categories). +If nothing was found in the current category, the next enabled category is +scanned according to the same algorithm. If there are no more enabled +categories, the search has failed. + +**Warning**: previous versions of LLDB defined cascading to mean not only going +through typedef chains, but also through inheritance chains. This feature has +been removed since it significantly degrades performance. You need to set up +your formatters for every type in inheritance chains to which you want the +formatter to apply.