Index: lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc =================================================================== --- /dev/null +++ lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc @@ -0,0 +1,74 @@ +//===-- sanitizer_symbolize.cc ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of weak hooks from sanitizer_symbolizer_posix_libcdep.cc. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "llvm/DebugInfo/Symbolize/DIPrinter.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" + +static llvm::symbolize::LLVMSymbolizer *getDefaultSymbolizer() { + static llvm::symbolize::LLVMSymbolizer *DefaultSymbolizer = 0; + if (DefaultSymbolizer == 0) { + DefaultSymbolizer = new llvm::symbolize::LLVMSymbolizer(); + } + return DefaultSymbolizer; +} + +namespace __sanitizer { +int internal_snprintf(char *buffer, unsigned long length, const char *format, + ...); +} // namespace __sanitizer + +extern "C" { + +typedef uint64_t u64; + +bool __llvm_symbolize_code(const char *ModuleName, uint64_t ModuleOffset, + char *Buffer, int MaxLength) { + std::string Result; + { + llvm::raw_string_ostream OS(Result); + llvm::symbolize::DIPrinter Printer(OS); + auto ResOrErr = + getDefaultSymbolizer()->symbolizeInlinedCode(ModuleName, ModuleOffset); + Printer << (ResOrErr ? ResOrErr.get() : llvm::DIInliningInfo()); + } + __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str()); + return true; +} + +bool __llvm_symbolize_data(const char *ModuleName, uint64_t ModuleOffset, + char *Buffer, int MaxLength) { + std::string Result; + { + llvm::raw_string_ostream OS(Result); + llvm::symbolize::DIPrinter Printer(OS); + auto ResOrErr = + getDefaultSymbolizer()->symbolizeData(ModuleName, ModuleOffset); + Printer << (ResOrErr ? ResOrErr.get() : llvm::DIGlobal()); + } + __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str()); + return true; +} + +void __llvm_symbolize_flush() { getDefaultSymbolizer()->flush(); } + +int __llvm_symbolize_demangle(const char *Name, char *Buffer, int MaxLength) { + std::string Result = + llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr); + __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str()); + return static_cast(Result.size() + 1); +} + +} // extern "C" Index: lib/sanitizer_common/symbolizer/sanitizer_symbolizer_hooks.cc =================================================================== --- /dev/null +++ lib/sanitizer_common/symbolizer/sanitizer_symbolizer_hooks.cc @@ -0,0 +1,43 @@ +//===-- sanitizer_symbolizer_hooks.cc ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hooks from sanitizer_symbolizer_posix_libcdep.cc. +// +//===----------------------------------------------------------------------===// + +#include + +extern "C" { + +bool __llvm_symbolize_code(const char *ModuleName, uint64_t ModuleOffset, + char *Buffer, int MaxLength); +bool __llvm_symbolize_data(const char *ModuleName, uint64_t ModuleOffset, + char *Buffer, int MaxLength); +void __llvm_symbolize_flush(); +int __llvm_symbolize_demangle(const char *Name, char *Buffer, int MaxLength); + +// Override weak functions in sanitizer runtime. +bool __sanitizer_symbolize_code(const char *ModuleName, uint64_t ModuleOffset, + char *Buffer, int MaxLength) { + return __llvm_symbolize_code(ModuleName, ModuleOffset, Buffer, MaxLength); +} + +bool __sanitizer_symbolize_data(const char *ModuleName, uint64_t ModuleOffset, + char *Buffer, int MaxLength) { + return __llvm_symbolize_data(ModuleName, ModuleOffset, Buffer, MaxLength); +} + +void __sanitizer_symbolize_flush() { __llvm_symbolize_flush(); } + +int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, + int MaxLength) { + return __llvm_symbolize_demangle(Name, Buffer, MaxLength); +} + +} // extern "C" Index: lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc =================================================================== --- /dev/null +++ lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc @@ -0,0 +1,182 @@ +//===-- sanitizer_wrappers.cc -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Redirect some functions to sanitizer interceptors. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#include + +// Need to match ../sanitizer_common/sanitizer_internal_defs.h +#if defined(ARCH_PPC) +#define OFF_T unsigned long +#else +#define OFF_T unsigned long long +#endif + +namespace __sanitizer { +unsigned long internal_open(const char *filename, int flags); +unsigned long internal_open(const char *filename, int flags, unsigned mode); +unsigned long internal_close(int fd); +unsigned long internal_stat(const char *path, void *buf); +unsigned long internal_lstat(const char *path, void *buf); +unsigned long internal_fstat(int fd, void *buf); +size_t internal_strlen(const char *s); +unsigned long internal_mmap(void *addr, unsigned long length, int prot, + int flags, int fd, OFF_T offset); +void *internal_memcpy(void *dest, const void *src, unsigned long n); +// Used to propagate errno. +bool internal_iserror(unsigned long retval, int *rverrno = 0); +} // namespace __sanitizer + +namespace { + +template +struct GetTypes; + +template +struct GetTypes { + using Result = R; + template + struct Arg { + using Type = typename std::tuple_element>::type; + }; +}; + +#define LLVM_SYMBOLIZER_GET_FUNC(Function) \ + ((__interceptor_##Function) \ + ? (__interceptor_##Function) \ + : reinterpret_cast(dlsym(RTLD_NEXT, #Function))) + +#define LLVM_SYMBOLIZER_INTERCEPTOR1(Function, ...) \ + GetTypes<__VA_ARGS__>::Result __interceptor_##Function( \ + GetTypes<__VA_ARGS__>::Arg<0>::Type) __attribute__((weak)); \ + GetTypes<__VA_ARGS__>::Result Function( \ + GetTypes<__VA_ARGS__>::Arg<0>::Type arg0) { \ + return LLVM_SYMBOLIZER_GET_FUNC(Function)(arg0); \ + } + +#define LLVM_SYMBOLIZER_INTERCEPTOR2(Function, ...) \ + GetTypes<__VA_ARGS__>::Result __interceptor_##Function( \ + GetTypes<__VA_ARGS__>::Arg<0>::Type, \ + GetTypes<__VA_ARGS__>::Arg<1>::Type) __attribute__((weak)); \ + GetTypes<__VA_ARGS__>::Result Function( \ + GetTypes<__VA_ARGS__>::Arg<0>::Type arg0, \ + GetTypes<__VA_ARGS__>::Arg<1>::Type arg1) { \ + return LLVM_SYMBOLIZER_GET_FUNC(Function)(arg0, arg1); \ + } + +#define LLVM_SYMBOLIZER_INTERCEPTOR3(Function, ...) \ + GetTypes<__VA_ARGS__>::Result __interceptor_##Function( \ + GetTypes<__VA_ARGS__>::Arg<0>::Type, \ + GetTypes<__VA_ARGS__>::Arg<1>::Type, \ + GetTypes<__VA_ARGS__>::Arg<2>::Type) __attribute__((weak)); \ + GetTypes<__VA_ARGS__>::Result Function( \ + GetTypes<__VA_ARGS__>::Arg<0>::Type arg0, \ + GetTypes<__VA_ARGS__>::Arg<1>::Type arg1, \ + GetTypes<__VA_ARGS__>::Arg<2>::Type arg2) { \ + return LLVM_SYMBOLIZER_GET_FUNC(Function)(arg0, arg1, arg2); \ + } + +#define LLVM_SYMBOLIZER_INTERCEPTOR4(Function, ...) \ + GetTypes<__VA_ARGS__>::Result __interceptor_##Function( \ + GetTypes<__VA_ARGS__>::Arg<0>::Type, \ + GetTypes<__VA_ARGS__>::Arg<1>::Type, \ + GetTypes<__VA_ARGS__>::Arg<2>::Type, \ + GetTypes<__VA_ARGS__>::Arg<3>::Type) __attribute__((weak)); \ + GetTypes<__VA_ARGS__>::Result Function( \ + GetTypes<__VA_ARGS__>::Arg<0>::Type arg0, \ + GetTypes<__VA_ARGS__>::Arg<1>::Type arg1, \ + GetTypes<__VA_ARGS__>::Arg<2>::Type arg2, \ + GetTypes<__VA_ARGS__>::Arg<3>::Type arg3) { \ + return LLVM_SYMBOLIZER_GET_FUNC(Function)(arg0, arg1, arg2, arg3); \ + } + +} // namespace + +// C-style interface around internal sanitizer libc functions. +extern "C" { + +// Libc++ calls these hooks from __cxa_guard_acquire. Normally, they should go +// into google3 fiber scheduler code. But in the canned symbolizer they are +// both unnecessary and lead to deadlocks as symbolizer calls arbitrary google3 +// code during race reporting. +void __google_potentially_blocking_region_begin() {} +void __google_potentially_blocking_region_end() {} + +#define RETURN_OR_SET_ERRNO(T, res) \ + int rverrno; \ + if (__sanitizer::internal_iserror(res, &rverrno)) { \ + errno = rverrno; \ + return (T)-1; \ + } \ + return (T)res; + +int open(const char *filename, int flags, ...) { + unsigned long res; + if (flags | O_CREAT) { + va_list va; + va_start(va, flags); + unsigned mode = va_arg(va, unsigned); + va_end(va); + res = __sanitizer::internal_open(filename, flags, mode); + } else { + res = __sanitizer::internal_open(filename, flags); + } + RETURN_OR_SET_ERRNO(int, res); +} + +int close(int fd) { + unsigned long res = __sanitizer::internal_close(fd); + RETURN_OR_SET_ERRNO(int, res); +} + +#define STAT(func, arg, buf) \ + unsigned long res = __sanitizer::internal_##func(arg, buf); \ + RETURN_OR_SET_ERRNO(int, res); + +int stat(const char *path, struct stat *buf) { STAT(stat, path, buf); } + +int lstat(const char *path, struct stat *buf) { STAT(lstat, path, buf); } + +int fstat(int fd, struct stat *buf) { STAT(fstat, fd, buf); } + +// Redirect versioned stat functions to the __sanitizer::internal() as well. +int __xstat(int version, const char *path, struct stat *buf) { + STAT(stat, path, buf); +} + +int __lxstat(int version, const char *path, struct stat *buf) { + STAT(lstat, path, buf); +} + +int __fxstat(int version, int fd, struct stat *buf) { STAT(fstat, fd, buf); } + +size_t strlen(const char *s) { return __sanitizer::internal_strlen(s); } + +void *mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) { + unsigned long res = __sanitizer::internal_mmap( + addr, (unsigned long)length, prot, flags, fd, (unsigned long long)offset); + RETURN_OR_SET_ERRNO(void *, res); +} + +LLVM_SYMBOLIZER_INTERCEPTOR3(read, ssize_t(int, void *, size_t)) +LLVM_SYMBOLIZER_INTERCEPTOR4(pread, ssize_t(int, void *, size_t, off_t)) +LLVM_SYMBOLIZER_INTERCEPTOR4(pread64, ssize_t(int, void *, size_t, off64_t)) +LLVM_SYMBOLIZER_INTERCEPTOR2(realpath, char *(const char *, char *)) + +} // extern "C" Index: lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh =================================================================== --- /dev/null +++ lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh @@ -0,0 +1,154 @@ +#!/bin/bash -eu + +# Run as: CLANG=bin/clang ZLIB_SRC=src/zlib build_symbolizer.sh +# You can download zlib sources from http://www.zlib.net. + +# Script to produce perfect, entirely self-contained symbolization library +# from libc++ and LLVM sources (and zlib, too). It internalizes symbols in these libs, +# so that this library may be linked into arbitrary programs and be invoked +# by Sanitizer runtime libraries to symbolize code/data in-process. +# The output of this script is a single-file archive which +# * includes a private copy of libc++, libz and some LLVM libs, +# * exports nothing except for the symbolizer interface, +# * has several libc functions redirected to sanitizer internal implementation, +# adding a dependency on Sanitizer runtime library. + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SRC_DIR=$(readlink -f $SCRIPT_DIR/..) +BUILD_DIR=$(readlink -f ./symbolizer) +mkdir -p $BUILD_DIR + +LLVM_SRC=$(readlink -f $SCRIPT_DIR/../../../../../..) + +if [[ "$LLVM_SRC" == "" || + ! -d "${LLVM_SRC}/projects/libcxxabi" || + ! -d "${LLVM_SRC}/projects/libcxx" ]]; then + echo "Missing or incomplete LLVM_SRC" + exit 1 +fi +LLVM_SRC=$(readlink -f $LLVM_SRC) + +if [[ "$ZLIB_SRC" == "" || + ! -x "${ZLIB_SRC}/configure" || + ! -f "${ZLIB_SRC}/zlib.h" ]]; then + echo "Missing or incomplete ZLIB_SRC" + exit 1 +fi +ZLIB_SRC=$(readlink -f $ZLIB_SRC) + +J="${J:-50}" + +CLANG="${CLANG:-`which clang`}" +CLANG_DIR=$(readlink -f $(dirname "$CLANG")) + +CC=$CLANG_DIR/clang +CXX=$CLANG_DIR/clang++ +TBLGEN=$CLANG_DIR/llvm-tblgen +LINK=$CLANG_DIR/llvm-link +OPT=$CLANG_DIR/opt +AR=$CLANG_DIR/llvm-ar + +if [[ ! -x "$CC" || + ! -x "$CXX" || + ! -x "$TBLGEN" || + ! -x "$LINK" || + ! -x "$OPT" || + ! -x "$AR" ]]; then + echo "Missing or incomplete CLANG_DIR" + exit 1 +fi + +# libc++abi can't be built without exceptions or rtti +ZLIB_BUILD=${BUILD_DIR}/zlib +LIBCXX_BUILD=${BUILD_DIR}/libcxx +LLVM_BUILD=${BUILD_DIR}/llvm +SANITIZER_LLVM_BUILD=${BUILD_DIR}/sanitizer + +FLAGS=${FLAGS:-} +FLAGS="$FLAGS -w -fPIC -flto -Os -g0 -fno-exceptions -DNDEBUG" + +# Build zlib. +mkdir -p ${ZLIB_BUILD} +cd ${ZLIB_BUILD} +cp -r ${ZLIB_SRC}/* . +CC=$CC CFLAGS="$FLAGS" RANLIB=/bin/true ./configure --static +make clean +make -j${J} libz.a + +# Build and install libcxxabi and libcxx. +if [[ ! -d ${LIBCXX_BUILD} ]]; then + mkdir -p ${LIBCXX_BUILD} + cd ${LIBCXX_BUILD} + cmake -GNinja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=$CC \ + -DCMAKE_CXX_COMPILER=$CXX \ + -DCMAKE_C_FLAGS_RELEASE="${FLAGS} -I${LLVM_SRC}/projects/libcxxabi/include" \ + -DCMAKE_CXX_FLAGS_RELEASE="${FLAGS} -I${LLVM_SRC}/projects/libcxxabi/include" \ + -DLIBCXXABI_ENABLE_ASSERTIONS=OFF \ + -DLIBCXXABI_ENABLE_EXCEPTIONS=OFF \ + -DLIBCXXABI_ENABLE_SHARED=OFF \ + -DLIBCXXABI_ENABLE_THREADS=OFF \ + -DLIBCXX_ENABLE_ASSERTIONS=OFF \ + -DLIBCXX_ENABLE_EXCEPTIONS=OFF \ + -DLIBCXX_ENABLE_RTTI=OFF \ + -DLIBCXX_ENABLE_SHARED=OFF \ + -DLIBCXX_ENABLE_THREADS=OFF \ + $LLVM_SRC +fi +cd ${LIBCXX_BUILD} +ninja -t clean +ninja cxx_objects cxxabi_objects + +# From now on, use libraries we've just built. +FLAGS="${FLAGS} -fno-rtti -I${ZLIB_BUILD} -I${LIBCXX_BUILD}/include/c++/v1" + +# Build LLVM. +if [[ ! -d ${LLVM_BUILD} ]]; then + mkdir -p ${LLVM_BUILD} + cd ${LLVM_BUILD} + cmake -GNinja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=$CC \ + -DCMAKE_CXX_COMPILER=$CXX \ + -DCMAKE_C_FLAGS_RELEASE="${FLAGS}" \ + -DCMAKE_CXX_FLAGS_RELEASE="${FLAGS}" \ + -DLLVM_TABLEGEN=$TBLGEN \ + -DLLVM_ENABLE_ZLIB=ON \ + -DLLVM_ENABLE_TERMINFO=OFF \ + -DLLVM_ENABLE_THREADS=OFF \ + $LLVM_SRC +fi +cd ${LLVM_BUILD} +ninja -t clean +ninja LLVMSymbolize LLVMObject LLVMDebugInfoDWARF LLVMSupport LLVMDebugInfoPDB LLVMMC + +cd ${BUILD_DIR} +rm -rf ${SANITIZER_LLVM_BUILD} +mkdir ${SANITIZER_LLVM_BUILD} +cd ${SANITIZER_LLVM_BUILD} + +for F in `find ${BUILD_DIR} -type f -regex .*\\\\.o$`; do + ( ($LINK $F -o $(mktemp XXXXXX.o) 2>/dev/null) || (echo "skipping non-llvm: $F") ) & +done +wait + +FLAGS="$FLAGS -std=c++11 -I${LLVM_SRC}/include -I${LLVM_BUILD}/include" +$CXX $FLAGS ${SRC_DIR}/sanitizer_symbolize.cc ${SRC_DIR}/sanitizer_wrappers.cc -c + +SYMBOLIZER_API_LIST=__llvm_symbolize_code,__llvm_symbolize_data,__llvm_symbolize_flush,__llvm_symbolize_demangle + +# Merge all the object files together and copy the resulting library back. +$LINK *.o -o all.bc +$OPT -internalize -internalize-public-api-list=${SYMBOLIZER_API_LIST} all.bc -o opt.bc +$CC $FLAGS -fno-lto -c opt.bc +$AR rcs libsymbolizer.a opt.o + +echo "Checking undefined symbols..." +nm -f posix -g opt.o | cut -f 1,2 -d \ | LC_COLLATE=C sort -u > undefined.new +(diff -u $SCRIPT_DIR/global_symbols.txt undefined.new | grep -E "^\+[^+]") && \ + (echo "Failed: unexpected symbols"; exit 1) + +cp libsymbolizer.a $BUILD_DIR/.. + +echo "Success!" Index: lib/sanitizer_common/symbolizer/scripts/global_symbols.txt =================================================================== --- /dev/null +++ lib/sanitizer_common/symbolizer/scripts/global_symbols.txt @@ -0,0 +1,130 @@ +_ZN11__sanitizer13internal_mmapEPvmiiiy U +_ZN11__sanitizer13internal_openEPKcij U +_ZN11__sanitizer13internal_statEPKcPv U +_ZN11__sanitizer14internal_closeEi U +_ZN11__sanitizer14internal_fstatEiPv U +_ZN11__sanitizer14internal_lstatEPKcPv U +_ZN11__sanitizer15internal_strlenEPKc U +_ZN11__sanitizer16internal_iserrorEmPi U +_ZN11__sanitizer17internal_snprintfEPcmPKcz U +__ctype_b_loc U +__ctype_get_mb_cur_max U +__cxa_atexit U +__dso_handle U +__errno_location U +__interceptor_pread w +__interceptor_read w +__interceptor_realpath w +__llvm_symbolize_code T +__llvm_symbolize_data T +__llvm_symbolize_demangle T +__llvm_symbolize_flush T +__strdup U +_exit U +abort U +access U +calloc U +catclose U +catgets U +catopen U +ceil U +cfgetospeed U +dl_iterate_phdr U +dlsym U +dup2 U +environ U +execv U +exit U +fclose U +fflush U +fileno U +fopen U +fork U +fprintf U +fputc U +free U +freelocale U +fwrite U +getc U +getcwd U +getenv U +getpagesize U +getpid U +gettimeofday U +ioctl U +isatty U +isprint U +isupper U +isxdigit U +log10 U +lseek U +malloc U +mbrlen U +mbrtowc U +mbsnrtowcs U +mbsrtowcs U +mbtowc U +memchr U +memcmp U +memcpy U +memmove U +memset U +mkdir U +munmap U +newlocale U +perror U +posix_spawn U +posix_spawn_file_actions_adddup2 U +posix_spawn_file_actions_addopen U +posix_spawn_file_actions_destroy U +posix_spawn_file_actions_init U +qsort U +rand U +readlink U +realloc U +remove U +setvbuf U +sigfillset U +sigprocmask U +snprintf U +sprintf U +srand U +sscanf U +stderr U +stdin U +stdout U +strcat U +strchr U +strcmp U +strcpy U +strdup U +strerror U +strerror_r U +strftime_l U +strncmp U +strncpy U +strrchr U +strsep U +strtod_l U +strtof_l U +strtol U +strtold_l U +strtoll_l U +strtoull_l U +tcgetattr U +uname U +ungetc U +unlink U +uselocale U +vasprintf U +vfprintf U +vsnprintf U +vsscanf U +waitpid U +wcrtomb U +wcslen U +wcsnrtombs U +wmemcpy U +wmemmove U +wmemset U +write U