Skip to content

Commit 3c9f66d

Browse files
committedJun 18, 2019
[asan_symbolize] Teach asan_symbolize.py to symbolicate partially symbolicated ASan reports.
Summary: The use case here is to be able symbolicate ASan reports that might be partially symbolicated, in particular where the function name is known but no source location is available. This can be caused by missing debug info. Previously we would only try to symbolicate completely unsymbolicated reports. The code currently contains an unfortunate quirk to handle a darwin specific bug (rdar://problem/49784442) in the way partially symbolicated reports are emitted when the source location is missing. rdar://problem/49476995 Reviewers: kubamracek, yln, samsonov, dvyukov, vitalybuka Subscribers: aprantl, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D60533 llvm-svn: 363639
1 parent 7747700 commit 3c9f66d

File tree

2 files changed

+69
-2
lines changed

2 files changed

+69
-2
lines changed
 

‎compiler-rt/lib/asan/scripts/asan_symbolize.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ def __init__(self, plugin_proxy=None, dsym_hint_producer=None):
383383
self.dsym_hints = set([])
384384
self.frame_no = 0
385385
self.process_line = self.process_line_posix
386+
self.using_module_map = plugin_proxy.has_plugin(ModuleMapPlugIn.get_name())
386387

387388
def symbolize_address(self, addr, binary, offset, arch):
388389
# On non-Darwin (i.e. on platforms without .dSYM debug info) always use
@@ -451,14 +452,26 @@ def process_line_echo(self, line):
451452

452453
def process_line_posix(self, line):
453454
self.current_line = line.rstrip()
454-
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
455+
# Unsymbolicated:
456+
# #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
457+
# Partially symbolicated:
458+
# #0 0x7f6e35cf2e45 in foo (foo.so+0x11fe45)
459+
# NOTE: We have to very liberal with symbol
460+
# names in the regex because it could be an
461+
# Objective-C or C++ demangled name.
455462
stack_trace_line_format = (
456-
'^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
463+
'^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)')
457464
match = re.match(stack_trace_line_format, line)
458465
if not match:
459466
return [self.current_line]
460467
logging.debug(line)
461468
_, frameno_str, addr, binary, offset = match.groups()
469+
if not self.using_module_map and not os.path.isabs(binary):
470+
# Do not try to symbolicate if the binary is just the module file name
471+
# and a module map is unavailable.
472+
# FIXME(dliew): This is currently necessary for reports on Darwin that are
473+
# partially symbolicated by `atos`.
474+
return [self.current_line]
462475
arch = ""
463476
# Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
464477
colon_pos = binary.rfind(":")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// When `external_symbolizer_path` is empty on Darwin we fallback on using
2+
// dladdr as the symbolizer which means we get the symbol name
3+
// but no source location. The current implementation also doesn't try to
4+
// change the module name so we end up with the full name so we actually don't
5+
// need the module map here.
6+
7+
// RUN: %clangxx_asan -O0 -g %s -o %t.executable
8+
// RUN: %env_asan_opts=symbolize=1,print_module_map=0,external_symbolizer_path= not %run %t.executable > %t2.log 2>&1
9+
// RUN: FileCheck -input-file=%t2.log -check-prefix=CHECK-PS %s
10+
// RUN: %asan_symbolize --force-system-symbolizer < %t2.log > %t2.fully_symbolized
11+
// RUN: FileCheck -input-file=%t2.fully_symbolized -check-prefix=CHECK-FS %s
12+
13+
// Due a quirk in the way atos reports module names we have to use the module
14+
// map here, otherwise we don't know what the full path to the module is.
15+
16+
// FIXME(dliew): We currently have to use module map for this test due to the atos
17+
// symbolizer changing the module name from an absolute path to just the file name.
18+
// rdar://problem/49784442
19+
//
20+
// Simulate partial symbolication (can happen with %L specifier) by printing
21+
// out %L's fallback which will print the module name and offset instead of a
22+
// source location.
23+
// RUN: %clangxx_asan -O0 -g %s -o %t2.executable
24+
// RUN: %env_asan_opts=symbolize=1,print_module_map=1,stack_trace_format='" #%%n %%p %%F %%M"' not %run %t.executable > %t2.log 2>&1
25+
// RUN: FileCheck -input-file=%t2.log -check-prefix=CHECK-PS %s
26+
// Now try to full symbolicate using the module map.
27+
// RUN: %asan_symbolize --module-map %t2.log --force-system-symbolizer < %t2.log > %t2.fully_symbolized
28+
// RUN: FileCheck -input-file=%t2.fully_symbolized -check-prefix=CHECK-FS %s
29+
30+
#include <cstdlib>
31+
32+
// Partially symbolicated back-trace where symbol is available but
33+
// source location is not and instead module name and offset are
34+
// printed.
35+
// CHECK-PS: WRITE of size 4
36+
// CHECK-PS: #0 0x{{.+}} in foo ({{.+}}.executable:{{.+}}+0x{{.+}})
37+
// CHECK-PS: #1 0x{{.+}} in main ({{.+}}.executable:{{.+}}+0x{{.+}})
38+
39+
// CHECK-FS: WRITE of size 4
40+
41+
extern "C" void foo(int* a) {
42+
// CHECK-FS: #0 0x{{.+}} in foo {{.*}}asan-symbolize-partial-report-with-module-map.cc:[[@LINE+1]]
43+
*a = 5;
44+
}
45+
46+
int main() {
47+
int* a = (int*) malloc(sizeof(int));
48+
if (!a)
49+
return 0;
50+
free(a);
51+
// CHECK-FS: #1 0x{{.+}} in main {{.*}}asan-symbolize-partial-report-with-module-map.cc:[[@LINE+1]]
52+
foo(a);
53+
return 0;
54+
}

0 commit comments

Comments
 (0)
Please sign in to comment.