This is an archive of the discontinued LLVM Phabricator instance.

[lldb] Parallelize fetching symbol files in crashlog.py
ClosedPublic

Authored by JDevlieghere on May 6 2022, 10:57 AM.

Download Raw Diff

Details

Reviewers

mib
clayborg

Summary

When using dsymForUUID, the majority of time symbolication a crashlog with crashlog.py is spent waiting for it to complete. Currently, we're calling dsymForUUID sequentially when iterating over the modules. We can drastically cut down this time by calling dsymForUUID in parallel. This patch uses Python's ThreadPoolExecutor (introduced in Python 3.2) to parallelize this IO-bound operation.

The performance improvement is hard to benchmark, because even with an empty local cache, consecutive calls to dsymForUUID for the same UUID complete faster. With warm caches, I'm seeing a ~30% performance improvement (~90s -> ~60s). I suspect the gains will be much bigger for a cold cache.

Diff Detail

Event Timeline

JDevlieghere created this revision.May 6 2022, 10:57 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 6 2022, 10:57 AM

Herald added a subscriber: kristof.beyls. · View Herald Transcript

JDevlieghere requested review of this revision.May 6 2022, 10:57 AM

JDevlieghere edited the summary of this revision. (Show Details)

JDevlieghere added inline comments.May 6 2022, 11:00 AM

lldb/examples/python/crashlog.py
272	The changes to this line and the one below are the result of "Getting symbols [...]" and "Resolved symbols [...]" no longer appearing after each other. By parallelizing this operation you get a bunch of consecutive "Getting symbols [...]" followed by a bunch of "Resolved symbols [...]" lines.

Harbormaster completed remote builds in B163180: Diff 427678.May 6 2022, 11:00 AM

Very cool! LGTM!

This revision is now accepted and ready to land.May 6 2022, 11:04 AM

lgtm!

JDevlieghere mentioned this in rGa8abb695859a: [lldb] Parallelize fetching symbol files in crashlog.py.May 13 2022, 12:25 PM

a8abb695859ad4e7fe695b9ee238a2b0cd00af7c

Revision Contents

Path

Size

lldb/

examples/

python/

crashlog.py

20 lines

Diff 427678

lldb/examples/python/crashlog.py

Show All 20 Lines
#		#
# On MacOSX csh, tcsh:		# On MacOSX csh, tcsh:
# ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )		# ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
#		#
# On MacOSX sh, bash:		# On MacOSX sh, bash:
# PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash		# PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
#----------------------------------------------------------------------		#----------------------------------------------------------------------

from __future__ import print_function
import cmd		import cmd
import contextlib		import contextlib
import datetime		import datetime
import glob		import glob
import json		import json
import optparse		import optparse
import os		import os
import platform		import platform
import plistlib		import plistlib
import re		import re
import shlex		import shlex
import string		import string
import subprocess		import subprocess
import sys		import sys
import time		import time
import uuid		import uuid
		import concurrent.futures

try:		try:
# First try for LLDB in case PYTHONPATH is already correctly setup.		# First try for LLDB in case PYTHONPATH is already correctly setup.
import lldb		import lldb
except ImportError:		except ImportError:
# Ask the command line driver for the path to the lldb module. Copy over		# Ask the command line driver for the path to the lldb module. Copy over
# the environment so that SDKROOT is propagated to xcrun.		# the environment so that SDKROOT is propagated to xcrun.
command = ['xcrun', 'lldb', '-P'] if platform.system() == 'Darwin' else ['lldb', '-P']		command = ['xcrun', 'lldb', '-P'] if platform.system() == 'Darwin' else ['lldb', '-P']
▲ Show 20 Lines • Show All 210 Lines • ▼ Show 20 Lines	class DarwinImage(symbolication.Image):
def locate_module_and_debug_symbols(self):		def locate_module_and_debug_symbols(self):
# Don't load a module twice...		# Don't load a module twice...
if self.resolved:		if self.resolved:
return True		return True
# Mark this as resolved so we don't keep trying		# Mark this as resolved so we don't keep trying
self.resolved = True		self.resolved = True
uuid_str = self.get_normalized_uuid_string()		uuid_str = self.get_normalized_uuid_string()
if self.show_symbol_progress():		if self.show_symbol_progress():
print('Getting symbols for %s %s...\n' % (uuid_str, self.path), end=' ')		print('Getting symbols for %s %s...' % (uuid_str, self.path))
		JDevlieghereAuthorUnsubmitted Done Reply Inline Actions The changes to this line and the one below are the result of "Getting symbols [...]" and "Resolved symbols [...]" no longer appearing after each other. By parallelizing this operation you get a bunch of consecutive "Getting symbols [...]" followed by a bunch of "Resolved symbols [...]" lines. JDevlieghere: The changes to this line and the one below are the result of "Getting symbols [...]" and…
if os.path.exists(self.dsymForUUIDBinary):		if os.path.exists(self.dsymForUUIDBinary):
dsym_for_uuid_command = '%s %s' % (		dsym_for_uuid_command = '%s %s' % (
self.dsymForUUIDBinary, uuid_str)		self.dsymForUUIDBinary, uuid_str)
s = subprocess.check_output(dsym_for_uuid_command, shell=True)		s = subprocess.check_output(dsym_for_uuid_command, shell=True)
if s:		if s:
try:		try:
plist_root = read_plist(s)		plist_root = read_plist(s)
except:		except:
Show All 33 Lines	class DarwinImage(symbolication.Image):
found_matching_slice = True		found_matching_slice = True
break		break
if found_matching_slice:		if found_matching_slice:
break		break
except:		except:
pass		pass
if (self.resolved_path and os.path.exists(self.resolved_path)) or (		if (self.resolved_path and os.path.exists(self.resolved_path)) or (
self.path and os.path.exists(self.path)):		self.path and os.path.exists(self.path)):
print('Resolved symbols for %s %s...\n' % (uuid_str, self.path), end=' ')		print('Resolved symbols for %s %s...' % (uuid_str, self.path))
return True		return True
else:		else:
self.unavailable = True		self.unavailable = True
return False		return False

def __init__(self, debugger, path, verbose):		def __init__(self, debugger, path, verbose):
"""CrashLog constructor that take a path to a darwin crash log file"""		"""CrashLog constructor that take a path to a darwin crash log file"""
symbolication.Symbolicator.__init__(self, debugger)		symbolication.Symbolicator.__init__(self, debugger)
▲ Show 20 Lines • Show All 578 Lines • ▼ Show 20 Lines	class Symbolicate:

def get_short_help(self):		def get_short_help(self):
return "Symbolicate one or more darwin crash log files."		return "Symbolicate one or more darwin crash log files."

def get_long_help(self):		def get_long_help(self):
option_parser = CrashLogOptionParser()		option_parser = CrashLogOptionParser()
return option_parser.format_help()		return option_parser.format_help()


def SymbolicateCrashLog(crash_log, options):		def SymbolicateCrashLog(crash_log, options):
if options.debug:		if options.debug:
crash_log.dump()		crash_log.dump()
if not crash_log.images:		if not crash_log.images:
print('error: no images in crash log')		print('error: no images in crash log')
return		return

if options.dump_image_list:		if options.dump_image_list:
Show All 30 Lines	else:
for ident in crash_log.idents:		for ident in crash_log.idents:
images = crash_log.find_images_with_identifier(ident)		images = crash_log.find_images_with_identifier(ident)
if images:		if images:
for image in images:		for image in images:
images_to_load.append(image)		images_to_load.append(image)
else:		else:
print('error: can\'t find image for identifier "%s"' % ident)		print('error: can\'t find image for identifier "%s"' % ident)

		futures = []
		with concurrent.futures.ThreadPoolExecutor() as executor:
		def add_module(image, target):
		return image, image.add_module(target)

for image in images_to_load:		for image in images_to_load:
if image not in loaded_images:		futures.append(executor.submit(add_module, image=image, target=target))
err = image.add_module(target)
		for future in concurrent.futures.as_completed(futures):
		image, err = future.result()
if err:		if err:
print(err)		print(err)
else:		else:
loaded_images.append(image)		loaded_images.append(image)

if crash_log.backtraces:		if crash_log.backtraces:
for thread in crash_log.backtraces:		for thread in crash_log.backtraces:
thread.dump_symbolicated(crash_log, options)		thread.dump_symbolicated(crash_log, options)
▲ Show 20 Lines • Show All 255 Lines • Show Last 20 Lines