diff --git a/compiler-rt/lib/hwasan/CMakeLists.txt b/compiler-rt/lib/hwasan/CMakeLists.txt --- a/compiler-rt/lib/hwasan/CMakeLists.txt +++ b/compiler-rt/lib/hwasan/CMakeLists.txt @@ -181,6 +181,8 @@ add_compiler_rt_resource_file(hwasan_blacklist hwasan_blacklist.txt hwasan) +add_subdirectory("scripts") + # if(COMPILER_RT_INCLUDE_TESTS) # add_subdirectory(tests) # endif() diff --git a/compiler-rt/lib/hwasan/scripts/CMakeLists.txt b/compiler-rt/lib/hwasan/scripts/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/hwasan/scripts/CMakeLists.txt @@ -0,0 +1,2 @@ +add_compiler_rt_script(hwasan_symbolize) +add_dependencies(hwasan hwasan_symbolize) diff --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize new file mode 100755 --- /dev/null +++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize @@ -0,0 +1,282 @@ +#!/usr/bin/env python +#===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https:#llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# +# +# HWAddressSanitizer offline symbolization script. +# +#===------------------------------------------------------------------------===# +import glob +import os +import re +import sys +import string +import subprocess +import argparse + +last_access_address = None +last_access_tag = None + +class Symbolizer: + def __init__(self, path, binary_prefixes, paths_to_cut): + self.__pipe = None + self.__path = path + self.__binary_prefixes = binary_prefixes + self.__paths_to_cut = paths_to_cut + self.__log = False + + def enable_logging(self, enable): + self.__log = enable + + def __open_pipe(self): + if not self.__pipe: + self.__pipe = subprocess.Popen([self.__path, "-inlining", "-functions"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + + class __EOF: + pass + + def __write(self, s): + print >>self.__pipe.stdin, s + if self.__log: + print >>sys.stderr, ("#>> |%s|" % (s,)) + + def __read(self): + s = self.__pipe.stdout.readline().rstrip() + if self.__log: + print >>sys.stderr, ("# << |%s|" % (s,)) + if s == '': + raise Symbolizer.__EOF + return s + + def __process_source_path(self, file_name): + for path_to_cut in self.__paths_to_cut: + file_name = re.sub(".*" + path_to_cut, "", file_name) + file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name) + file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name) + file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) + return file_name + + def __process_binary_name(self, name): + if name.startswith('/'): + name = name[1:] + for p in self.__binary_prefixes: + full_path = os.path.join(p, name) + if os.path.exists(full_path): + return full_path + # Try stripping extra path components as the last resort. + for p in self.__binary_prefixes: + full_path = os.path.join(p, os.path.basename(name)) + if os.path.exists(full_path): + return full_path + print >>sys.stderr, "Could not find symbols for", name + return None + + def iter_locals(self, binary, addr): + self.__open_pipe() + p = self.__pipe + binary = self.__process_binary_name(binary) + if not binary: + return + self.__write("FRAME %s %s" % (binary, addr)) + try: + while True: + function_name = self.__read() + local_name = self.__read() + file_line = self.__read() + extra = self.__read().split() + + file_line = self.__process_source_path(file_line) + offset = None if extra[0] == '??' else int(extra[0]) + size = None if extra[1] == '??' else int(extra[1]) + tag_offset = None if extra[2] == '??' else int(extra[2]) + yield (function_name, file_line, local_name, offset, size, tag_offset) + except Symbolizer.__EOF: + pass + + def iter_call_stack(self, binary, addr): + self.__open_pipe() + p = self.__pipe + binary = self.__process_binary_name(binary) + if not binary: + return + self.__write("CODE %s %s" % (binary, addr)) + try: + while True: + function_name = self.__read() + file_line = self.__read() + file_line = self.__process_source_path(file_line) + yield (function_name, file_line) + except Symbolizer.__EOF: + pass + +def symbolize_line(line, symbolizer_path): + #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) + match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE) + if match: + frameno = match.group(2) + binary = match.group(5) + addr = int(match.group(6), 16) + + frames = list(symbolizer.iter_call_stack(binary, addr)) + + if len(frames) > 0: + print "%s#%s%s%s in %s" % (match.group(1).encode('utf-8'), match.group(2).encode('utf-8'), + match.group(3).encode('utf-8'), frames[0][0], frames[0][1]) + for i in range(1, len(frames)): + space1 = ' ' * match.end(1) + space2 = ' ' * (match.start(4) - match.end(1) - 2) + print "%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1]) + else: + print line.rstrip().encode('utf-8') + else: + print line.rstrip().encode('utf-8') + +def save_access_address(line): + global last_access_address, last_access_tag + match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE) + if match: + last_access_address = int(match.group(2), 16) + match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]+ tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE) + if match: + last_access_tag = int(match.group(2), 16) + +def process_stack_history(line, symbolizer, ignore_tags=False): + if last_access_address is None or last_access_tag is None: + return + if re.match(r'Previously allocated frames:', line, re.UNICODE): + return True + pc_mask = (1 << 48) - 1 + fp_mask = (1 << 20) - 1 + # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) + match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE) + if match: + record_addr = int(match.group(2), 16) + record = int(match.group(3), 16) + binary = match.group(4) + addr = int(match.group(5), 16) + base_tag = (record_addr >> 3) & 0xFF + fp = (record >> 48) << 4 + pc = record & pc_mask + + for local in symbolizer.iter_locals(binary, addr): + frame_offset = local[3] + size = local[4] + if frame_offset is None or size is None: + continue + obj_offset = (last_access_address - fp - frame_offset) & fp_mask + if obj_offset >= size: + continue + tag_offset = local[5] + if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag): + continue + print '' + print 'Potentially referenced stack object:' + print ' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]) + print ' at %s' % (local[1],) + return True + return False + +parser = argparse.ArgumentParser() +parser.add_argument('-d', action='store_true') +parser.add_argument('-v', action='store_true') +parser.add_argument('--ignore-tags', action='store_true') +parser.add_argument('--symbols', action='append') +parser.add_argument('--source', action='append') +parser.add_argument('--symbolizer') +parser.add_argument('args', nargs=argparse.REMAINDER) +args = parser.parse_args() + +# Unstripped binaries location. +binary_prefixes = args.symbols or [] +if not binary_prefixes: + if 'ANDROID_PRODUCT_OUT' in os.environ: + product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols') + binary_prefixes.append(product_out) + +for p in binary_prefixes: + if not os.path.isdir(p): + print >>sys.stderr, "Symbols path does not exist or is not a directory:", p + sys.exit(1) + +# Source location. +paths_to_cut = args.source or [] +if not paths_to_cut: + paths_to_cut.append(os.getcwd() + '/') + if 'ANDROID_BUILD_TOP' in os.environ: + paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/') + +# llvm-symbolizer binary. +# 1. --symbolizer flag +# 2. environment variable +# 3. unsuffixed binary in the current directory +# 4. if inside Android platform, prebuilt binary at a known path +# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the +# highest available version in $PATH +symbolizer_path = args.symbolizer +if not symbolizer_path: + if 'LLVM_SYMBOLIZER_PATH' in os.environ: + symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH'] + elif 'HWASAN_SYMBOLIZER_PATH' in os.environ: + symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH'] + +if not symbolizer_path: + s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer') + if os.path.exists(s): + symbolizer_path = s + +if not symbolizer_path: + if 'ANDROID_BUILD_TOP' in os.environ: + s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer') + if os.path.exists(s): + symbolizer_path = s + +if not symbolizer_path: + for path in os.environ["PATH"].split(os.pathsep): + p = os.path.join(path, 'llvm-symbolizer') + if os.path.exists(p): + symbolizer_path = p + break + +def extract_version(s): + idx = s.rfind('-') + if idx == -1: + return 0 + x = float(s[idx + 1:]) + return x + +if not symbolizer_path: + for path in os.environ["PATH"].split(os.pathsep): + candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*')) + if len(candidates) > 0: + candidates.sort(key = extract_version, reverse = True) + symbolizer_path = candidates[0] + break + +if not os.path.exists(symbolizer_path): + print >>sys.stderr, "Symbolizer path does not exist:", symbolizer_path + sys.exit(1) + +if args.v: + print "Looking for symbols in:" + for s in binary_prefixes: + print " %s" % (s,) + print "Stripping source path prefixes:" + for s in paths_to_cut: + print " %s" % (s,) + print "Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,) + print + +symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut) +symbolizer.enable_logging(args.d) + +for line in sys.stdin: + line = line.decode('utf-8') + save_access_address(line) + if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags): + continue + symbolize_line(line, symbolizer_path)