Index: lib/sanitizer_common/CMakeLists.txt =================================================================== --- lib/sanitizer_common/CMakeLists.txt +++ lib/sanitizer_common/CMakeLists.txt @@ -26,6 +26,7 @@ sanitizer_suppressions.cc sanitizer_symbolizer.cc sanitizer_symbolizer_libbacktrace.cc + sanitizer_symbolizer_mac.cc sanitizer_symbolizer_win.cc sanitizer_tls_get_addr.cc sanitizer_thread_registry.cc Index: lib/sanitizer_common/sanitizer_common.h =================================================================== --- lib/sanitizer_common/sanitizer_common.h +++ lib/sanitizer_common/sanitizer_common.h @@ -68,6 +68,15 @@ void DecreaseTotalMmap(uptr size); uptr GetRSS(); +// Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr +// is extracted. When extracting a string, a newly allocated (using +// InternalAlloc) and null-terminataed buffer is returned. They return a pointer +// to the next characted after the found delimiter. +const char *ExtractToken(const char *str, const char *delims, char **result); +const char *ExtractUptr(const char *str, const char *delims, uptr *result); +const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, + char **result); + // InternalScopedBuffer can be used instead of large stack arrays to // keep frame size low. // FIXME: use InternalAlloc instead of MmapOrDie once Index: lib/sanitizer_common/sanitizer_common.cc =================================================================== --- lib/sanitizer_common/sanitizer_common.cc +++ lib/sanitizer_common/sanitizer_common.cc @@ -11,6 +11,7 @@ // run-time libraries. //===----------------------------------------------------------------------===// +#include "sanitizer_allocator_internal.h" #include "sanitizer_common.h" #include "sanitizer_flags.h" #include "sanitizer_libc.h" @@ -272,6 +273,49 @@ atomic_fetch_sub(&g_total_mmaped, size, memory_order_relaxed); } +// Extracts the prefix of "str" that consists of any characters not +// present in "delims" string, and copies this prefix to "result", allocating +// space for it. +// Returns a pointer to "str" after skipping extracted prefix and first +// delimiter char. +const char *ExtractToken(const char *str, const char *delims, char **result) { + uptr prefix_len = internal_strcspn(str, delims); + *result = (char *)InternalAlloc(prefix_len + 1); + internal_memcpy(*result, str, prefix_len); + (*result)[prefix_len] = '\0'; + const char *prefix_end = str + prefix_len; + if (*prefix_end != '\0') + prefix_end++; + return prefix_end; +} + +// Same as ExtractToken, but converts extracted token to uptr. +const char *ExtractUptr(const char *str, const char *delims, uptr *result) { + char *buff; + const char *ret = ExtractToken(str, delims, &buff); + if (buff != 0) { + *result = (uptr)internal_atoll(buff); + } + InternalFree(buff); + return ret; +} + +// Similar to ExtractToken, but looks only for a single delimiter which can +// be multiple characters long. +const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, + char **result) { + const char *found_delimiter = internal_strstr(str, delimiter); + uptr prefix_len = + found_delimiter ? found_delimiter - str : internal_strlen(str); + *result = (char *)InternalAlloc(prefix_len + 1); + internal_memcpy(*result, str, prefix_len); + (*result)[prefix_len] = '\0'; + const char *prefix_end = str + prefix_len; + if (*prefix_end != '\0') + prefix_end += internal_strlen(delimiter); + return prefix_end; +} + } // namespace __sanitizer using namespace __sanitizer; // NOLINT Index: lib/sanitizer_common/sanitizer_symbolizer.h =================================================================== --- lib/sanitizer_common/sanitizer_symbolizer.h +++ lib/sanitizer_common/sanitizer_symbolizer.h @@ -137,6 +137,68 @@ }; }; +class SymbolizerInterface { + public: + // Can't declare pure virtual functions in sanitizer runtimes: + // __cxa_pure_virtual might be unavailable. + virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) { + UNIMPLEMENTED(); + } + + virtual bool SymbolizeData(uptr addr, DataInfo *info) { + UNIMPLEMENTED(); + } + + protected: + static const uptr kBufferSize = 16 * 1024; + char buffer_[kBufferSize]; +}; + +// SymbolizerProcess encapsulates communication between the tool and +// external symbolizer program, running in a different subprocess. +// SymbolizerProcess may not be used from two threads simultaneously. +class SymbolizerProcess { + public: + explicit SymbolizerProcess(const char *path) + : path_(path), + fd_to_child_(kInvalidFd), + times_restarted_(0), + failed_to_start_(false), + reported_invalid_path_(false) { + CHECK(path_); + CHECK_NE(path_[0], '\0'); + } + + char *SendCommand(const char *command); + + protected: + bool Restart(); + char *SendCommandImpl(const char *command); + bool ReadFromSymbolizer(char *buffer, uptr max_length); + bool WriteToSymbolizer(const char *buffer, uptr length); + bool StartSymbolizerSubprocess(); + + virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const { + UNIMPLEMENTED(); + } + + virtual void ExecuteWithDefaultArgs(const char *path_to_binary) const { + UNIMPLEMENTED(); + } + + const char *path_; + fd_t fd_to_child_; + + static const uptr kBufferSize = 16 * 1024; + char buffer_[kBufferSize]; + + static const uptr kMaxTimesRestarted = 5; + static const int kSymbolizerStartupTimeMillis = 10; + uptr times_restarted_; + bool failed_to_start_; + bool reported_invalid_path_; +}; + } // namespace __sanitizer #endif // SANITIZER_SYMBOLIZER_H Index: lib/sanitizer_common/sanitizer_symbolizer_libbacktrace.h =================================================================== --- lib/sanitizer_common/sanitizer_symbolizer_libbacktrace.h +++ lib/sanitizer_common/sanitizer_symbolizer_libbacktrace.h @@ -28,12 +28,11 @@ namespace __sanitizer { -class LibbacktraceSymbolizer { +class LibbacktraceSymbolizer : public SymbolizerInterface { public: static LibbacktraceSymbolizer *get(LowLevelAllocator *alloc); - SymbolizedStack *SymbolizeCode(uptr addr, const char *module_name, - uptr module_offset); + bool SymbolizePC(uptr addr, SymbolizedStack *stack); bool SymbolizeData(uptr addr, DataInfo *info); Index: lib/sanitizer_common/sanitizer_symbolizer_libbacktrace.cc =================================================================== --- lib/sanitizer_common/sanitizer_symbolizer_libbacktrace.cc +++ lib/sanitizer_common/sanitizer_symbolizer_libbacktrace.cc @@ -156,9 +156,10 @@ return new(*alloc) LibbacktraceSymbolizer(state); } -SymbolizedStack *LibbacktraceSymbolizer::SymbolizeCode(uptr addr, - const char *module_name, - uptr module_offset) { +bool LibbacktraceSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { + const char *module_name = stack->info.module; + uptr module_offset = stack->info.module_offset; + SymbolizeCodeCallbackArg data; data.first = nullptr; data.last = nullptr; @@ -185,9 +186,7 @@ return 0; } -SymbolizedStack *LibbacktraceSymbolizer::SymbolizeCode(uptr addr, - const char *module_name, - uptr module_offset) { +bool LibbacktraceSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { (void)state_; return nullptr; } Index: lib/sanitizer_common/sanitizer_symbolizer_mac.h =================================================================== --- lib/sanitizer_common/sanitizer_symbolizer_mac.h +++ lib/sanitizer_common/sanitizer_symbolizer_mac.h @@ -0,0 +1,57 @@ +//===-- sanitizer_symbolizer_mac.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is shared between various sanitizers' runtime libraries. +// +// Header for Mac-specific "atos" symbolizer. +//===----------------------------------------------------------------------===// + +#ifndef SANITIZER_SYMBOLIZER_MAC_H +#define SANITIZER_SYMBOLIZER_MAC_H + +#include "sanitizer_symbolizer.h" + +#include + +namespace __sanitizer { + +class AtosSymbolizerProcess : public SymbolizerProcess { + public: + explicit AtosSymbolizerProcess(const char *path, pid_t parent_pid) + : SymbolizerProcess(path), parent_pid_(parent_pid) {} + + private: + virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const { + return (length >= 1 && buffer_[length - 1] == '\n'); + } + + virtual void ExecuteWithDefaultArgs(const char *path_to_binary) const { + char pid_str[16] = {0}; + internal_snprintf(pid_str, sizeof(pid_str), "%d", parent_pid_); + execl(path_, path_, "-p", pid_str, (char *)0); + } + + pid_t parent_pid_; +}; + +class AtosSymbolizer : public SymbolizerInterface { + public: + explicit AtosSymbolizer(const char *path) + : process_(new AtosSymbolizerProcess(path, getpid())) {} + + bool SymbolizePC(uptr addr, SymbolizedStack *stack); + bool SymbolizeData(uptr addr, DataInfo *info); + + private: + AtosSymbolizerProcess *process_; +}; + +} // namespace __sanitizer + +#endif // SANITIZER_SYMBOLIZER_MAC_H Index: lib/sanitizer_common/sanitizer_symbolizer_mac.cc =================================================================== --- lib/sanitizer_common/sanitizer_symbolizer_mac.cc +++ lib/sanitizer_common/sanitizer_symbolizer_mac.cc @@ -0,0 +1,66 @@ +//===-- sanitizer_symbolizer_mac.cc ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is shared between various sanitizers' runtime libraries. +// +// Implementation of Mac-specific "atos" symbolizer. +//===----------------------------------------------------------------------===// + +#include "sanitizer_allocator_internal.h" +#include "sanitizer_symbolizer.h" +#include "sanitizer_symbolizer_mac.h" + +namespace __sanitizer { + +void ParseCommandOutput(const char *str, SymbolizedStack *res) { + // Trim ending newlines. + char *trimmed_str; + ExtractTokenUpToDelimiter(str, "\n", &trimmed_str); + + // The line from `atos` is in one of these formats: + // myfunction (in library.dylib) (sourcefile.c:17) + // myfunction (in library.dylib) + 0x1fe + // 0xdeadbeef (in library.dylib) + 0x1fe + // 0xdeadbeef (in library.dylib) + // 0xdeadbeef + const char *rest = trimmed_str; + char *function_name; + rest = ExtractTokenUpToDelimiter(rest, " (in ", &function_name); + if (internal_strncmp(function_name, "0x", 2) != 0) + res->info.function = function_name; + else + InternalFree(function_name); + rest = ExtractTokenUpToDelimiter(rest, ") ", &res->info.module); + + if (rest[0] == '(') { + rest++; + rest = ExtractTokenUpToDelimiter(rest, ":", &res->info.file); + char *extracted_line_number; + rest = ExtractTokenUpToDelimiter(rest, ")", &extracted_line_number); + res->info.line = internal_atoll(extracted_line_number); + InternalFree(extracted_line_number); + } + + InternalFree(trimmed_str); +} + +bool AtosSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { + internal_snprintf(buffer_, kBufferSize, "0x%zx\n", addr); + char *buf = process_->SendCommand(buffer_); + if (!buf) + return false; + ParseCommandOutput(buf, stack); + return true; +} + +bool AtosSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { + return false; +} + +} // namespace __sanitizer Index: lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc =================================================================== --- lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc +++ lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc @@ -23,11 +23,14 @@ #include "sanitizer_procmaps.h" #include "sanitizer_symbolizer.h" #include "sanitizer_symbolizer_libbacktrace.h" +#include "sanitizer_symbolizer_mac.h" +#include #include #include #include #include +#include // C++ demangling function, as required by Itanium C++ ABI. This is weak, // because we do not require a C++ ABI library to be linked to a program @@ -55,258 +58,191 @@ return name; } -// Extracts the prefix of "str" that consists of any characters not -// present in "delims" string, and copies this prefix to "result", allocating -// space for it. -// Returns a pointer to "str" after skipping extracted prefix and first -// delimiter char. -static const char *ExtractToken(const char *str, const char *delims, - char **result) { - uptr prefix_len = internal_strcspn(str, delims); - *result = (char*)InternalAlloc(prefix_len + 1); - internal_memcpy(*result, str, prefix_len); - (*result)[prefix_len] = '\0'; - const char *prefix_end = str + prefix_len; - if (*prefix_end != '\0') prefix_end++; - return prefix_end; -} - -// Same as ExtractToken, but converts extracted token to integer. -static const char *ExtractInt(const char *str, const char *delims, - int *result) { - char *buff; - const char *ret = ExtractToken(str, delims, &buff); - if (buff != 0) { - *result = (int)internal_atoll(buff); - } - InternalFree(buff); - return ret; -} - -static const char *ExtractUptr(const char *str, const char *delims, - uptr *result) { - char *buff; - const char *ret = ExtractToken(str, delims, &buff); - if (buff != 0) { - *result = (uptr)internal_atoll(buff); +char *SymbolizerProcess::SendCommand(const char *command) { + for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { + // Start or restart symbolizer if we failed to send command to it. + if (char *res = SendCommandImpl(command)) + return res; + Restart(); } - InternalFree(buff); - return ret; -} - -class ExternalSymbolizerInterface { - public: - // Can't declare pure virtual functions in sanitizer runtimes: - // __cxa_pure_virtual might be unavailable. - virtual char *SendCommand(bool is_data, const char *module_name, - uptr module_offset) { - UNIMPLEMENTED(); + if (!failed_to_start_) { + Report("WARNING: Failed to use and restart external symbolizer!\n"); + failed_to_start_ = true; } -}; + return 0; +} -// SymbolizerProcess encapsulates communication between the tool and -// external symbolizer program, running in a different subprocess. -// SymbolizerProcess may not be used from two threads simultaneously. -class SymbolizerProcess : public ExternalSymbolizerInterface { - public: - explicit SymbolizerProcess(const char *path) - : path_(path), - input_fd_(kInvalidFd), - output_fd_(kInvalidFd), - times_restarted_(0), - failed_to_start_(false), - reported_invalid_path_(false) { - CHECK(path_); - CHECK_NE(path_[0], '\0'); +bool SymbolizerProcess::Restart() { + if (fd_to_child_ != kInvalidFd) { + internal_close(fd_to_child_); + fd_to_child_ = kInvalidFd; } + return StartSymbolizerSubprocess(); +} - char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { - for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { - // Start or restart symbolizer if we failed to send command to it. - if (char *res = SendCommandImpl(is_data, module_name, module_offset)) - return res; - Restart(); - } - if (!failed_to_start_) { - Report("WARNING: Failed to use and restart external symbolizer!\n"); - failed_to_start_ = true; - } +char *SymbolizerProcess::SendCommandImpl(const char *command) { + if (fd_to_child_ == kInvalidFd) return 0; - } - - private: - bool Restart() { - if (input_fd_ != kInvalidFd) - internal_close(input_fd_); - if (output_fd_ != kInvalidFd) - internal_close(output_fd_); - return StartSymbolizerSubprocess(); - } - - char *SendCommandImpl(bool is_data, const char *module_name, - uptr module_offset) { - if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) - return 0; - CHECK(module_name); - if (!RenderInputCommand(buffer_, kBufferSize, is_data, module_name, - module_offset)) - return 0; - if (!writeToSymbolizer(buffer_, internal_strlen(buffer_))) - return 0; - if (!readFromSymbolizer(buffer_, kBufferSize)) - return 0; - return buffer_; - } + if (!WriteToSymbolizer(command, internal_strlen(command))) + return 0; + if (!ReadFromSymbolizer(buffer_, kBufferSize)) + return 0; + return buffer_; +} - bool readFromSymbolizer(char *buffer, uptr max_length) { - if (max_length == 0) - return true; - uptr read_len = 0; - while (true) { - uptr just_read = internal_read(input_fd_, buffer + read_len, - max_length - read_len - 1); - // We can't read 0 bytes, as we don't expect external symbolizer to close - // its stdout. - if (just_read == 0 || just_read == (uptr)-1) { - Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); - return false; - } - read_len += just_read; - if (ReachedEndOfOutput(buffer, read_len)) - break; - } - buffer[read_len] = '\0'; +bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) { + if (max_length == 0) return true; - } - - bool writeToSymbolizer(const char *buffer, uptr length) { - if (length == 0) - return true; - uptr write_len = internal_write(output_fd_, buffer, length); - if (write_len == 0 || write_len == (uptr)-1) { - Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); + uptr read_len = 0; + while (true) { + uptr just_read = internal_read(fd_to_child_, buffer + read_len, + max_length - read_len - 1); + // We can't read 0 bytes, as we don't expect external symbolizer to close + // its stdout. + if (just_read == 0 || internal_iserror(just_read)) { + Report("WARNING: Can't read from symbolizer at fd %d\n", fd_to_child_); return false; } - return true; + read_len += just_read; + if (ReachedEndOfOutput(buffer, read_len)) + break; } + buffer[read_len] = '\0'; + return true; +} - bool StartSymbolizerSubprocess() { - if (!FileExists(path_)) { - if (!reported_invalid_path_) { - Report("WARNING: invalid path to external symbolizer!\n"); - reported_invalid_path_ = true; - } - return false; - } - - int *infd = NULL; - int *outfd = NULL; - // The client program may close its stdin and/or stdout and/or stderr - // thus allowing socketpair to reuse file descriptors 0, 1 or 2. - // In this case the communication between the forked processes may be - // broken if either the parent or the child tries to close or duplicate - // these descriptors. The loop below produces two pairs of file - // descriptors, each greater than 2 (stderr). - int sock_pair[5][2]; - for (int i = 0; i < 5; i++) { - if (pipe(sock_pair[i]) == -1) { - for (int j = 0; j < i; j++) { - internal_close(sock_pair[j][0]); - internal_close(sock_pair[j][1]); - } - Report("WARNING: Can't create a socket pair to start " - "external symbolizer (errno: %d)\n", errno); - return false; - } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) { - if (infd == NULL) { - infd = sock_pair[i]; - } else { - outfd = sock_pair[i]; - for (int j = 0; j < i; j++) { - if (sock_pair[j] == infd) continue; - internal_close(sock_pair[j][0]); - internal_close(sock_pair[j][1]); - } - break; - } - } - } - CHECK(infd); - CHECK(outfd); - - // Real fork() may call user callbacks registered with pthread_atfork(). - int pid = internal_fork(); - if (pid == -1) { - // Fork() failed. - internal_close(infd[0]); - internal_close(infd[1]); - internal_close(outfd[0]); - internal_close(outfd[1]); - Report("WARNING: failed to fork external symbolizer " - " (errno: %d)\n", errno); - return false; - } else if (pid == 0) { - // Child subprocess. - internal_close(STDOUT_FILENO); - internal_close(STDIN_FILENO); - internal_dup2(outfd[0], STDIN_FILENO); - internal_dup2(infd[1], STDOUT_FILENO); - internal_close(outfd[0]); - internal_close(outfd[1]); - internal_close(infd[0]); - internal_close(infd[1]); - for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--) - internal_close(fd); - ExecuteWithDefaultArgs(path_); - internal__exit(1); - } - - // Continue execution in parent process. - internal_close(outfd[0]); - internal_close(infd[1]); - input_fd_ = infd[0]; - output_fd_ = outfd[1]; - - // Check that symbolizer subprocess started successfully. - int pid_status; - SleepForMillis(kSymbolizerStartupTimeMillis); - int exited_pid = waitpid(pid, &pid_status, WNOHANG); - if (exited_pid != 0) { - // Either waitpid failed, or child has already exited. - Report("WARNING: external symbolizer didn't start up correctly!\n"); - return false; - } - +bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { + if (length == 0) return true; + uptr write_len = internal_write(fd_to_child_, buffer, length); + if (write_len == 0 || internal_iserror(write_len)) { + Report("WARNING: Can't write to symbolizer at fd %d\n", fd_to_child_); + return false; } + return true; +} - virtual bool RenderInputCommand(char *buffer, uptr max_length, bool is_data, - const char *module_name, - uptr module_offset) const { - UNIMPLEMENTED(); - } - - virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const { - UNIMPLEMENTED(); +bool SymbolizerProcess::StartSymbolizerSubprocess() { + if (!FileExists(path_)) { + if (!reported_invalid_path_) { + Report("WARNING: invalid path to external symbolizer!\n"); + reported_invalid_path_ = true; + } + return false; } - virtual void ExecuteWithDefaultArgs(const char *path_to_binary) const { - UNIMPLEMENTED(); + fd_t fd = kInvalidFd; + // Use forkpty to disable buffering in the new terminal. + int pid = forkpty(&fd, 0, 0, 0); + if (pid == -1) { + // forkpty() failed. + Report("WARNING: failed to fork external symbolizer (errno: %d)\n", errno); + return false; + } else if (pid == 0) { + // Child subprocess. + ExecuteWithDefaultArgs(path_); + internal__exit(1); + } + + // Continue execution in parent process. + fd_to_child_ = fd; + + // Disable echo in the new terminal, disable CR. + struct termios termflags; + tcgetattr(fd_to_child_, &termflags); + termflags.c_oflag &= ~ONLCR; + termflags.c_lflag &= ~ECHO; + tcsetattr(fd_to_child_, TCSANOW, &termflags); + + // Check that symbolizer subprocess started successfully. + int pid_status; + SleepForMillis(kSymbolizerStartupTimeMillis); + int exited_pid = waitpid(pid, &pid_status, WNOHANG); + if (exited_pid != 0) { + // Either waitpid failed, or child has already exited. + Report("WARNING: external symbolizer didn't start up correctly!\n"); + return false; } - const char *path_; - int input_fd_; - int output_fd_; + return true; +} - static const uptr kBufferSize = 16 * 1024; - char buffer_[kBufferSize]; +// Parses a two-line string in the following format: +// +// :: +// or this format (without the column number): +// +// : +// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of +// them use the same output format. +static void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { + uptr orig_addr = res->info.address; + const char *orig_module_name = res->info.module; + uptr orig_module_offset = res->info.module_offset; + + bool top_frame = true; + SymbolizedStack *last = res; + while (true) { + char *function_name = 0; + str = ExtractToken(str, "\n", &function_name); + CHECK(function_name); + if (function_name[0] == '\0') { + // There are no more frames. + break; + } + SymbolizedStack *cur; + if (top_frame) { + cur = res; + top_frame = false; + } else { + cur = SymbolizedStack::New(orig_addr); + cur->info.FillAddressAndModuleInfo(orig_addr, orig_module_name, + orig_module_offset); + last->next = cur; + last = cur; + } + + AddressInfo *info = &cur->info; + info->function = function_name; + // Parse :(:) buffer. + char *file_line_info = 0; + str = ExtractToken(str, "\n", &file_line_info); + CHECK(file_line_info); + const char *line_info = ExtractToken(file_line_info, ":", &info->file); + uptr line_number; + line_info = ExtractUptr(line_info, ":", &line_number); + info->line = line_number; + if (*line_info != '\0') { + uptr column_number; + line_info = ExtractUptr(line_info, "", &column_number); + info->column = column_number; + } + InternalFree(file_line_info); + + // Functions and filenames can be "??", in which case we write 0 + // to address info to mark that names are unknown. + if (0 == internal_strcmp(info->function, "??")) { + InternalFree(info->function); + info->function = 0; + } + if (0 == internal_strcmp(info->file, "??")) { + InternalFree(info->file); + info->file = 0; + } + } +} - static const uptr kMaxTimesRestarted = 5; - static const int kSymbolizerStartupTimeMillis = 10; - uptr times_restarted_; - bool failed_to_start_; - bool reported_invalid_path_; -}; +// Parses a two-line string in the following format: +// +// +// Used by LLVMSymbolizer and InternalSymbolizer. +static void ParseSymbolizeDataOutput(const char *str, uptr addr, + DataInfo *info) { + str = ExtractToken(str, "\n", &info->name); + str = ExtractUptr(str, " ", &info->start); + str = ExtractUptr(str, "\n", &info->size); + info->start += (addr - info->module_offset); // Add the module base address. +} // For now we assume the following protocol: // For each request of the form @@ -323,13 +259,6 @@ explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {} private: - bool RenderInputCommand(char *buffer, uptr max_length, bool is_data, - const char *module_name, uptr module_offset) const { - internal_snprintf(buffer, max_length, "%s\"%s\" 0x%zx\n", - is_data ? "DATA " : "", module_name, module_offset); - return true; - } - bool ReachedEndOfOutput(const char *buffer, uptr length) const { // Empty line marks the end of llvm-symbolizer output. return length >= 2 && buffer[length - 1] == '\n' && @@ -357,6 +286,46 @@ } }; +class LLVMSymbolizer : public SymbolizerInterface { + public: + explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) + : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} + + bool SymbolizePC(uptr addr, SymbolizedStack *stack) { + const char *module_name = stack->info.module; + uptr module_offset = stack->info.module_offset; + char *buffer = + RenderInputCommand(/*is_data*/ false, module_name, module_offset); + char *buf = symbolizer_process_->SendCommand(buffer); + if (!buf) + return false; + ParseSymbolizePCOutput(buf, stack); + return true; + } + + bool SymbolizeData(uptr addr, DataInfo *info) override { + const char *module_name = info->module; + uptr module_offset = info->module_offset; + char *buffer = + RenderInputCommand(/*is_data*/ true, module_name, module_offset); + const char *str = symbolizer_process_->SendCommand(buffer); + if (str == 0) + return true; + ParseSymbolizeDataOutput(str, addr, info); + return true; + } + + private: + char *RenderInputCommand(bool is_data, const char *module_name, + uptr module_offset) { + internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", + is_data ? "DATA " : "", module_name, module_offset); + return buffer_; + } + + LLVMSymbolizerProcess *symbolizer_process_; +}; + class Addr2LineProcess : public SymbolizerProcess { public: Addr2LineProcess(const char *path, const char *module_name) @@ -365,15 +334,6 @@ const char *module_name() const { return module_name_; } private: - bool RenderInputCommand(char *buffer, uptr max_length, bool is_data, - const char *module_name, uptr module_offset) const { - if (is_data) - return false; - CHECK_EQ(0, internal_strcmp(module_name, module_name_)); - internal_snprintf(buffer, max_length, "0x%zx\n", module_offset); - return true; - } - bool ReachedEndOfOutput(const char *buffer, uptr length) const { // Output should consist of two lines. int num_lines = 0; @@ -393,16 +353,17 @@ const char *module_name_; // Owned, leaked. }; -class Addr2LinePool : public ExternalSymbolizerInterface { +class Addr2LinePool : public SymbolizerInterface { public: explicit Addr2LinePool(const char *addr2line_path, LowLevelAllocator *allocator) : addr2line_path_(addr2line_path), allocator_(allocator), addr2line_pool_(16) {} - char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { - if (is_data) - return 0; + bool SymbolizePC(uptr addr, SymbolizedStack *stack) { + const char *module_name = stack->info.module; + uptr module_offset = stack->info.module_offset; + Addr2LineProcess *addr2line = 0; for (uptr i = 0; i < addr2line_pool_.size(); ++i) { if (0 == @@ -416,10 +377,26 @@ new(*allocator_) Addr2LineProcess(addr2line_path_, module_name); addr2line_pool_.push_back(addr2line); } - return addr2line->SendCommand(is_data, module_name, module_offset); + + CHECK_EQ(0, internal_strcmp(module_name, addr2line->module_name())); + char *req = RenderInputCommand(module_name, module_offset); + char *buf = addr2line->SendCommand(req); + if (!buf) + return false; + ParseSymbolizePCOutput(buf, stack); + return true; + } + + bool SymbolizeData(uptr addr, DataInfo *info) { + return false; } private: + char *RenderInputCommand(const char *module_name, uptr module_offset) { + internal_snprintf(buffer_, kBufferSize, "0x%zx\n", module_offset); + return buffer_; + } + const char *addr2line_path_; LowLevelAllocator *allocator_; InternalMmapVector addr2line_pool_; @@ -440,7 +417,7 @@ int MaxLength); } // extern "C" -class InternalSymbolizer { +class InternalSymbolizer : public SymbolizerInterface { public: typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int); @@ -452,7 +429,25 @@ return 0; } - char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { + bool SymbolizePC(uptr addr, SymbolizedStack *stack) { + char *buf = SymbolizePC(/*is_data*/ false, stack->info.module, + stack->info.module_offset); + if (!buf) + return false; + ParseSymbolizePCOutput(buf, stack); + return true; + } + + bool SymbolizeData(uptr addr, DataInfo *info) { + char *buf = + SymbolizePC(/*is_data*/ true, info->module, info->module_offset); + if (!buf) + return false; + ParseSymbolizeDataOutput(buf, addr, info); + return true; + } + + char *SymbolizePC(bool is_data, const char *module_name, uptr module_offset) { SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data : __sanitizer_symbolize_code; if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize)) @@ -492,93 +487,68 @@ }; #else // SANITIZER_SUPPORTS_WEAK_HOOKS -class InternalSymbolizer { +class InternalSymbolizer : public SymbolizerInterface { public: static InternalSymbolizer *get(LowLevelAllocator *alloc) { return 0; } - char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { + bool SymbolizePC(uptr addr, SymbolizedStack *stack) { return 0; } + bool SymbolizeData(uptr addr, DataInfo *info) { + return false; + } void Flush() { } const char *Demangle(const char *name) { return name; } }; #endif // SANITIZER_SUPPORTS_WEAK_HOOKS +class DlAddrSymbolizer : public SymbolizerInterface { + public: + bool SymbolizePC(uptr addr, SymbolizedStack *stack) { + Dl_info info; + int result = dladdr((const void *)addr, &info); + if (!result) + return false; + stack->info.function = internal_strdup(info.dli_sname); + return true; + } + bool SymbolizeData(uptr addr, DataInfo *info) { + return false; + } +}; + class POSIXSymbolizer : public Symbolizer { public: - POSIXSymbolizer(ExternalSymbolizerInterface *external_symbolizer, + POSIXSymbolizer(SymbolizerInterface *external_symbolizer, InternalSymbolizer *internal_symbolizer, - LibbacktraceSymbolizer *libbacktrace_symbolizer) + LibbacktraceSymbolizer *libbacktrace_symbolizer, + DlAddrSymbolizer *dladdr_symbolizer) : Symbolizer(), external_symbolizer_(external_symbolizer), internal_symbolizer_(internal_symbolizer), - libbacktrace_symbolizer_(libbacktrace_symbolizer) {} + libbacktrace_symbolizer_(libbacktrace_symbolizer), + dladdr_symbolizer_(dladdr_symbolizer) {} SymbolizedStack *SymbolizePC(uptr addr) override { BlockingMutexLock l(&mu_); + + // Always fill data about module name and offset. const char *module_name; uptr module_offset; + SymbolizedStack *res = SymbolizedStack::New(addr); if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset)) - return SymbolizedStack::New(addr); - // First, try to use libbacktrace symbolizer (if it's available). + return res; + res->info.FillAddressAndModuleInfo(addr, module_name, module_offset); + if (libbacktrace_symbolizer_ != 0) { mu_.CheckLocked(); - if (SymbolizedStack *res = libbacktrace_symbolizer_->SymbolizeCode( - addr, module_name, module_offset)) + if (libbacktrace_symbolizer_->SymbolizePC(addr, res)) return res; } - // Always fill data about module name and offset. - SymbolizedStack *res = SymbolizedStack::New(addr); - res->info.FillAddressAndModuleInfo(addr, module_name, module_offset); - - const char *str = SendCommand(false, module_name, module_offset); - if (str == 0) { - // Symbolizer was not initialized or failed. - return res; - } - - bool top_frame = true; - SymbolizedStack *last = res; - while (true) { - char *function_name = 0; - str = ExtractToken(str, "\n", &function_name); - CHECK(function_name); - if (function_name[0] == '\0') { - // There are no more frames. - break; - } - SymbolizedStack *cur; - if (top_frame) { - cur = res; - top_frame = false; - } else { - cur = SymbolizedStack::New(addr); - cur->info.FillAddressAndModuleInfo(addr, module_name, module_offset); - last->next = cur; - last = cur; - } - AddressInfo *info = &cur->info; - info->function = function_name; - // Parse :: buffer. - char *file_line_info = 0; - str = ExtractToken(str, "\n", &file_line_info); - CHECK(file_line_info); - const char *line_info = ExtractToken(file_line_info, ":", &info->file); - line_info = ExtractInt(line_info, ":", &info->line); - line_info = ExtractInt(line_info, "", &info->column); - InternalFree(file_line_info); - - // Functions and filenames can be "??", in which case we write 0 - // to address info to mark that names are unknown. - if (0 == internal_strcmp(info->function, "??")) { - InternalFree(info->function); - info->function = 0; - } - if (0 == internal_strcmp(info->file, "??")) { - InternalFree(info->file); - info->file = 0; - } + bool success = SymbolizePCImpl(addr, res); + if (!success && dladdr_symbolizer_) { + dladdr_symbolizer_->SymbolizePC(addr, res); } return res; } @@ -599,14 +569,7 @@ if (libbacktrace_symbolizer_->SymbolizeData(addr, info)) return true; } - const char *str = SendCommand(true, module_name, module_offset); - if (str == 0) - return true; - str = ExtractToken(str, "\n", &info->name); - str = ExtractUptr(str, " ", &info->start); - str = ExtractUptr(str, "\n", &info->size); - info->start += module->base_address(); - return true; + return SymbolizeDataImpl(addr, info); } bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, @@ -652,21 +615,27 @@ } private: - char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { + SymbolizerInterface *GetSymbolizerInterface() { mu_.CheckLocked(); - // First, try to use internal symbolizer. - if (internal_symbolizer_) { - SymbolizerScope sym_scope(this); - return internal_symbolizer_->SendCommand(is_data, module_name, - module_offset); - } - // Otherwise, fall back to external symbolizer. - if (external_symbolizer_) { - SymbolizerScope sym_scope(this); - return external_symbolizer_->SendCommand(is_data, module_name, - module_offset); - } - return 0; + if (internal_symbolizer_) + return internal_symbolizer_; + if (external_symbolizer_) + return external_symbolizer_; + return nullptr; + } + + bool SymbolizePCImpl(uptr addr, SymbolizedStack *stack) { + mu_.CheckLocked(); + SymbolizerInterface *sym = GetSymbolizerInterface(); + SymbolizerScope sym_scope(this); + return sym ? sym->SymbolizePC(addr, stack) : false; + } + + bool SymbolizeDataImpl(uptr addr, DataInfo *info) { + mu_.CheckLocked(); + SymbolizerInterface *sym = GetSymbolizerInterface(); + SymbolizerScope sym_scope(this); + return sym ? sym->SymbolizeData(addr, info) : false; } LoadedModule *FindModuleForAddress(uptr address) { @@ -718,24 +687,35 @@ bool modules_fresh_; BlockingMutex mu_; - ExternalSymbolizerInterface *external_symbolizer_; // Leaked. - InternalSymbolizer *const internal_symbolizer_; // Leaked. + SymbolizerInterface *external_symbolizer_; // Leaked. + InternalSymbolizer *internal_symbolizer_; // Leaked. LibbacktraceSymbolizer *libbacktrace_symbolizer_; // Leaked. + DlAddrSymbolizer *dladdr_symbolizer_; // Leaked. }; Symbolizer *Symbolizer::PlatformInit() { if (!common_flags()->symbolize) { - return new(symbolizer_allocator_) POSIXSymbolizer(0, 0, 0); + if (common_flags()->verbosity >= 2) + Report("Symbolizing is disabled.\n"); + return new(symbolizer_allocator_) POSIXSymbolizer(0, 0, 0, 0); } - InternalSymbolizer* internal_symbolizer = + InternalSymbolizer *internal_symbolizer = InternalSymbolizer::get(&symbolizer_allocator_); - ExternalSymbolizerInterface *external_symbolizer = 0; + SymbolizerInterface *external_symbolizer = 0; LibbacktraceSymbolizer *libbacktrace_symbolizer = 0; + DlAddrSymbolizer *dladdr_symbolizer = + new(symbolizer_allocator_) DlAddrSymbolizer(); - if (!internal_symbolizer) { + if (internal_symbolizer) { + if (common_flags()->verbosity >= 2) + Report("Using internal symbolizer.\n"); + } else { libbacktrace_symbolizer = LibbacktraceSymbolizer::get(&symbolizer_allocator_); - if (!libbacktrace_symbolizer) { + if (libbacktrace_symbolizer) { + if (common_flags()->verbosity >= 2) + Report("Using libbacktrace symbolizer.\n"); + } else { const char *path_to_external = common_flags()->external_symbolizer_path; if (path_to_external && path_to_external[0] == '\0') { // External symbolizer is explicitly disabled. Do nothing. @@ -744,21 +724,34 @@ if (!path_to_external) path_to_external = FindPathToBinary("llvm-symbolizer"); if (path_to_external) { + if (common_flags()->verbosity >= 2) + Report("Using llvm-symbolizer at path: %s\n", path_to_external); external_symbolizer = new(symbolizer_allocator_) - LLVMSymbolizerProcess(path_to_external); + LLVMSymbolizer(path_to_external, &symbolizer_allocator_); } else if (common_flags()->allow_addr2line) { // If llvm-symbolizer is not found, try to use addr2line. if (const char *addr2line_path = FindPathToBinary("addr2line")) { + if (common_flags()->verbosity >= 2) + Report("Using addr2line at path: %s\n", addr2line_path); external_symbolizer = new(symbolizer_allocator_) Addr2LinePool(addr2line_path, &symbolizer_allocator_); } + } else if (SANITIZER_MAC) { + const char *atos_path = FindPathToBinary("atos"); + if (atos_path) { + if (common_flags()->verbosity >= 2) + Report("Using atos at path: %s\n", atos_path); + external_symbolizer = + new(symbolizer_allocator_) AtosSymbolizer(atos_path); + } } } } } - return new(symbolizer_allocator_) POSIXSymbolizer( - external_symbolizer, internal_symbolizer, libbacktrace_symbolizer); + return new (symbolizer_allocator_) + POSIXSymbolizer(external_symbolizer, internal_symbolizer, + libbacktrace_symbolizer, dladdr_symbolizer); } } // namespace __sanitizer Index: test/asan/TestCases/Darwin/sandbox-symbolizer.cc =================================================================== --- test/asan/TestCases/Darwin/sandbox-symbolizer.cc +++ test/asan/TestCases/Darwin/sandbox-symbolizer.cc @@ -0,0 +1,23 @@ +// In a non-forking sandbox, we can't spawn an external symbolizer, but dladdr() +// should still work and provide function names. No line numbers though. + +// RUN: %clangxx_asan -O0 %s -o %t && not sandbox-exec -p '(version 1)(allow default)(deny process-fork)' %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_asan -O1 %s -o %t && not sandbox-exec -p '(version 1)(allow default)(deny process-fork)' %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_asan -O2 %s -o %t && not sandbox-exec -p '(version 1)(allow default)(deny process-fork)' %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_asan -O3 %s -o %t && not sandbox-exec -p '(version 1)(allow default)(deny process-fork)' %run %t 2>&1 | FileCheck %s + +#include +int main() { + char *x = (char*)malloc(10 * sizeof(char)); + free(x); + return x[5]; + // CHECK: {{.*ERROR: AddressSanitizer: heap-use-after-free on address}} + // CHECK: {{READ of size 1 at 0x.* thread T0}} + // CHECK: {{ #0 0x.* in main}} + // CHECK: {{freed by thread T0 here:}} + // CHECK: {{ #0 0x.* in wrap_free}} + // CHECK: {{ #1 0x.* in main}} + // CHECK: {{previously allocated by thread T0 here:}} + // CHECK: {{ #0 0x.* in wrap_malloc}} + // CHECK: {{ #1 0x.* in main}} +} Index: test/asan/TestCases/Darwin/suppressions-sandbox.cc =================================================================== --- test/asan/TestCases/Darwin/suppressions-sandbox.cc +++ test/asan/TestCases/Darwin/suppressions-sandbox.cc @@ -0,0 +1,26 @@ +// Check that without suppressions, we catch the issue. +// RUN: %clangxx_asan -O0 %s -o %t -framework Foundation +// RUN: not %run %t 2>&1 | FileCheck --check-prefix=CHECK-CRASH %s + +// Check that suppressing a function name works within a no-fork sandbox +// RUN: echo "interceptor_via_fun:CFStringCreateWithBytes" > %t.supp +// RUN: ASAN_OPTIONS=suppressions=%t.supp \ +// RUN: sandbox-exec -p '(version 1)(allow default)(deny process-fork)' \ +// RUN: %run %t 2>&1 | FileCheck --check-prefix=CHECK-IGNORE %s + +#include + +int main() { + char *a = (char *)malloc(6); + strcpy(a, "hello"); + CFStringRef str = + CFStringCreateWithBytes(kCFAllocatorDefault, (unsigned char *)a, 10, + kCFStringEncodingUTF8, FALSE); // BOOM + fprintf(stderr, "Ignored.\n"); + free(a); +} + +// CHECK-CRASH: AddressSanitizer: heap-buffer-overflow +// CHECK-CRASH-NOT: Ignored. +// CHECK-IGNORE-NOT: AddressSanitizer: heap-buffer-overflow +// CHECK-IGNORE: Ignored. Index: test/asan/TestCases/closed-fds.cc =================================================================== --- test/asan/TestCases/closed-fds.cc +++ test/asan/TestCases/closed-fds.cc @@ -0,0 +1,31 @@ +// Check that when the program closed its std(in|out|err), running the external +// symbolizer still works. + +// RUN: rm -f %t.log.* +// RUN: %clangxx_asan -O0 %s -o %t 2>&1 && ASAN_OPTIONS=log_path=%t.log:verbosity=2 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT +// RUN: FileCheck %s --check-prefix=CHECK-FILE < %t.log.* + +#include +#include +#include +#include + +int main(int argc, char **argv) { + fprintf(stderr, "Closing streams.\n"); + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); + fprintf(stderr, "Can you hear me now?\n"); + char *x = (char*)malloc(10 * sizeof(char)); + free(x); + x[argc] = 'X'; // BOOM + return 0; +} + +// CHECK-OUTPUT: Closing streams. +// CHECK-OUTPUT-NOT: Can you hear me now? +// CHECK-FILE: {{.*ERROR: AddressSanitizer: heap-use-after-free on address}} +// CHECK-FILE: {{0x.* at pc 0x.* bp 0x.* sp 0x.*}} +// CHECK-FILE: {{WRITE of size 1 at 0x.* thread T0}} +// CHECK-FILE: Using llvm-symbolizer at path: {{.*}} +// CHECK-FILE: {{ #0 0x.* in main .*closed-fds.cc:}}[[@LINE-10]]