Index: test/asan/TestCases/Darwin/zone_enumeration/in_process/simple_enumeration.cc =================================================================== --- /dev/null +++ test/asan/TestCases/Darwin/zone_enumeration/in_process/simple_enumeration.cc @@ -0,0 +1,153 @@ +// RUN: %clangxx_asan %s -o %t +// RUN: %run %t + +// This simple test tries to perform in-process malloc zone enumeration. +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const int sleep_time = 5000; + +// This is a hack to gather the known allocations in this process. We don't +// use a std::unordered_map here because that would trigger calls to the +// allocator inside the hook which we want to avoid. +static const size_t kMaxAllocations = 50000; +uintptr_t seen_malloc_addrs[kMaxAllocations]; +size_t seen_malloc_sizes[kMaxAllocations]; +uintptr_t seen_free_addrs[kMaxAllocations]; +bool hook_enabled = true; +uint32_t current_malloc_index = 0; +uint32_t current_free_index = 0; +extern "C" void __sanitizer_malloc_hook(const volatile void *ptr, size_t size) { + if (!hook_enabled) + return; + if (current_malloc_index >= kMaxAllocations) { + // Can't record any more allocations + fprintf(stderr, "WARNING: Can't record any more allocations\n"); + return; + } + seen_malloc_addrs[current_malloc_index] = reinterpret_cast(ptr); + seen_malloc_sizes[current_malloc_index] = size; + ++current_malloc_index; +} +extern "C" void __sanitizer_free_hook(const volatile void *ptr) { + if (!hook_enabled) + return; + if (current_free_index >= kMaxAllocations) { + // Can't record any more frees + fprintf(stderr, "WARNING: Can't record any more frees\n"); + return; + } + seen_free_addrs[current_free_index] = reinterpret_cast(ptr); + ++current_free_index; +} + +// Hack to record malloc zone allocations. We don't use +// a `std::unordered_map` here because during enumeration +// the allocator will be locked and `std::unordered_map` might call malloc. +uintptr_t mz_recorded_allocation_addrs[kMaxAllocations]; +size_t mz_recorded_allocation_sizes[kMaxAllocations]; +uintptr_t current_mz_record_index = 0; + +int main(int argc, char **argv) { + printf("Started demo ASan application\n"); + + // Deliberately perform some allocations that we will check for later. + void *a = malloc(512); // Should be served by primary size class allocator + void *b = + malloc(1024 * 1024); // Should be served by secondary mmap based allocator + void *c = malloc(1); + + // Stop recording allocations using the malloc hooks. Operating on + // `known_allocations` might trigger malloc/free so we don't want + // to record these. + hook_enabled = false; + std::unordered_map known_allocations; + // Populate map with observed mallocs + for (unsigned index = 0; index < current_malloc_index; ++index) { + known_allocations.insert( + {seen_malloc_addrs[index], seen_malloc_sizes[index]}); + } + // Removed free'd allocations + for (unsigned index = 0; index < current_free_index; ++index) { + known_allocations.erase(seen_free_addrs[index]); + } + assert(known_allocations.find(reinterpret_cast(a)) != + known_allocations.end()); + assert(known_allocations.find(reinterpret_cast(b)) != + known_allocations.end()); + assert(known_allocations.find(reinterpret_cast(c)) != + known_allocations.end()); + if (known_allocations.size() < 3) { + fprintf(stderr, "known_allocations is too small\n"); + return 1; + } + + // Now perform enumeration via malloc zone. + malloc_zone_t *mz = malloc_default_zone(); + malloc_introspection_t *mi = mz->introspect; + auto reader_fn = [](task_t remote_task, vm_address_t remote_address, + vm_size_t, void **local_memory) -> kern_return_t { + // Local read so nothing to do + assert(remote_task == mach_task_self()); + *local_memory = reinterpret_cast(remote_address); + return KERN_SUCCESS; + }; + auto recorder_fn = [](task_t remote_task, void *context, unsigned type, + vm_range_t *beg, unsigned num) -> void { + assert(remote_task == mach_task_self()); + assert(type == MALLOC_PTR_IN_USE_RANGE_TYPE); + assert(num > 0); + for (int index = 0; index < num; ++index) { + vm_range_t entry = beg[index]; + mz_recorded_allocation_addrs[current_mz_record_index] = entry.address; + mz_recorded_allocation_sizes[current_mz_record_index] = entry.size; + ++current_mz_record_index; + } + }; + kern_return_t result = + mi->enumerator(/*task=*/mach_task_self(), /*context=*/nullptr, + /*type_mask=*/MALLOC_PTR_IN_USE_RANGE_TYPE, + /*zone_address=*/reinterpret_cast(mz), + /*reader=*/reader_fn, + /*recorder=*/recorder_fn); + if (result != KERN_SUCCESS) { + fprintf(stderr, "Failed to enumerate zone: %s\n", + mach_error_string(result)); + return 1; + } + std::unordered_map enumerated_allocations; + // Populate map with enumerated live allocations + for (unsigned index = 0; index < current_mz_record_index; ++index) { + auto pair = + enumerated_allocations.insert({mz_recorded_allocation_addrs[index], + mz_recorded_allocation_sizes[index]}); + assert(pair.second && "key already inserted"); + } + assert(enumerated_allocations.size() > 0); + // Now make sure that every allocation in `known_allocation` was enumerated + for (const auto &pair : known_allocations) { + auto addr = pair.first; + auto size = pair.second; + auto it = enumerated_allocations.find(addr); + if (it == enumerated_allocations.end()) { + fprintf(stderr, "Failed to find %p\n", reinterpret_cast(addr)); + return 1; + } + if (size != it->second) { + fprintf(stderr, "size mismatch %lu != %lu\n", size, it->second); + return 1; + } + printf("Found %p (size %lu) from `known_allocations` in " + "`enumerated_allocations`\n", + reinterpret_cast(addr), size); + } + printf("DONE\n"); + return 0; +} Index: test/asan/TestCases/Darwin/zone_enumeration/out_of_process/analysis_prog.cc =================================================================== --- /dev/null +++ test/asan/TestCases/Darwin/zone_enumeration/out_of_process/analysis_prog.cc @@ -0,0 +1,734 @@ +// This process is the "analysis" portion of the test. +// +// The test +// * Launches an ASan target process +// * Waits for the target process the address of its malloc zone and +// and a list of allocations it knowns about. +// * Freezes to target process. +// * Load the ASan dylib and calls the malloc enumeration function, +// telling it to examine the ASan target process. +// * Compares the allocations reported by the malloc enumeration function +// and the allocation list that was sent by the ASan target process. +// +#include "protocol.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" char **environ; + +struct ScopedPosixSpawnFileActions { + typedef posix_spawn_file_actions_t dataTy; + dataTy actions; + ScopedPosixSpawnFileActions() { + int failure = posix_spawn_file_actions_init(&actions); + assert(failure == 0); + } + ~ScopedPosixSpawnFileActions() { + int failure = posix_spawn_file_actions_destroy(&actions); + assert(failure == 0); + } + void add_close(int fd) { + int failure = posix_spawn_file_actions_addclose(&actions, fd); + assert(failure == 0); + } +}; + +void usage_message(const char *name) { + printf("%s \n", name); + exit(1); +} + +kern_return_t simple_memory_reader(task_t remote_task, + vm_address_t remote_address, vm_size_t size, + void **local_memory); + +void vm_range_recorder(task_t, void *context, unsigned type, + vm_range_t *range_begin, unsigned num_ranges); + +struct Range { + uintptr_t start; + size_t size; + Range(uintptr_t start, size_t size) : start(start), size(size) {} + bool operator==(const Range &other) const { + return start == other.start && size == other.size; + } + uintptr_t end_byte() const { return (start + size - 1); } + + bool overlaps(const Range &other) const { + // This starts before other and this's + // tail overlaps other + if (start < other.start) { + if (end_byte() >= other.start) { + return true; + } + } + + // This starts after other and other's + // tail overlaps this. + if (start > other.start) { + if (other.end_byte() >= start) { + return true; + } + } + if (*this == other) { + // The same ranges overlap + return true; + } + return false; + } + + bool contains(const Range &other) const { + if (*this == other) + return true; + + if (start <= other.start) { + if (end_byte() >= other.end_byte()) { + return true; + } + } + return false; + } +}; + +struct VMRegionCache { + struct RangeCompare { + bool operator()(const Range &a, const Range &b) const { + if (a.overlaps(b)) { + // Ranges that overlap are consider incomparable. + // This should mean that when do a search for a range + // than contained within a stored range then it will + // be returned. + return false; + } + // If there's no overlap then order by the start address. + return a.start < b.start; + } + }; + using RegionMap = std::multimap; + +private: + // Maps VM regions in target process to VM regions in local process. + RegionMap regions; + +public: + VMRegionCache() {} + + ~VMRegionCache() { + // Free regions + for (const auto ®ion_pair : regions) { + int result = munmap(reinterpret_cast(region_pair.second), + region_pair.first.size); + assert(result == 0); + } + } + // Don't copy by accident. + VMRegionCache(const VMRegionCache &) = delete; + VMRegionCache(VMRegionCache &&) = delete; + VMRegionCache &operator=(const VMRegionCache &) = delete; + VMRegionCache &operator=(VMRegionCache &&) = delete; + + std::pair + find_vm_regions_for(Range &r) { + return regions.equal_range(r); + } + + RegionMap::iterator insert_vm_region(Range &remote_region_range, + uintptr_t local_region_start) { + auto it = regions.insert({remote_region_range, local_region_start}); + return it; + } +}; + +void custom_signal_handler(int sig_num, siginfo_t *info, void *uap); + +struct Analyzer { + std::string dylib_path; + std::string target_program; + pid_t pid_for_target = 0; + task_t task_for_target; + task_suspension_token_t task_suspension_token; + int pipe_fds[2]; + int read_fd() { return pipe_fds[0]; } + int write_fd() { return pipe_fds[1]; } + bool target_is_frozen = false; + bool failure = false; + size_t page_size; + bool print_enumerator_allocations = false; + VMRegionCache vm_cache; + + // From target + uintptr_t target_zone_addr = 0; + std::unordered_map known_allocations; + + // From enumerator + std::unordered_map enumerator_reported_allocations; + + // From ASan dylib + malloc_introspection_t *dylib_introspection_data; + // Function pointer to malloc zone enumerator in ASan dylib + kern_return_t (*enumerator)(task_t task, void *, unsigned type_mask, + vm_address_t zone_address, memory_reader_t reader, + vm_range_recorder_t recorder); + + Analyzer(const char *dylib_path, const char *program_name) { + this->dylib_path = dylib_path; + this->target_program = program_name; + this->page_size = getpagesize(); + if (getenv("ASAN_LEAKS_DEBUG") != 0) { + print_enumerator_allocations = true; + } + } + + bool target_started() { return pid_for_target != 0; } + +#ifdef __LP64__ +#define IF_64_ELSE(VALUE_64, VALUE_ELSE) VALUE_64 +#else +#define IF_64_ELSE(VALUE_64, VALUE_ELSE) VALUE_ELSE +#endif + + bool record_range_from_enumerator(vm_range_t range) { + auto pair = + enumerator_reported_allocations.insert({range.address, range.size}); + if (!pair.second) { + log_failure("Attempted to insert range (address:%p, size : % p) which is " + "already stored\n", + range.address, range.size); + } + if (print_enumerator_allocations) { + printf("Target allocation at %p with size %" IF_64_ELSE("lu", + "u") " bytes\n", + reinterpret_cast(range.address), range.size); + } + return pair.second; + } + + void log_failure(const char *format_str, ...) { + va_list args; + va_start(args, format_str); + vfprintf(stderr, format_str, args); + failure = true; + va_end(args); + } + + bool failed() const { return failure; } + + uintptr_t round_down_to_page_addr(uintptr_t addr) { + return (addr / page_size) * page_size; + } + + uintptr_t get_page_number(uintptr_t addr) { return addr / page_size; } + + bool launch_target() { + if (target_started()) + return false; + int failure = 0; + + // Set up pipe for communication from child. + // After spawn, parent needs to close write end + // and child needs to close read end. + failure = pipe(pipe_fds); + if (failure != 0) { + log_failure("Failed to create pipe.\n"); + perror(""); + return false; + } + + // Make child close the read end of pipe + ScopedPosixSpawnFileActions file_actions; + file_actions.add_close(read_fd()); + + std::string fd_for_child = std::to_string(write_fd()); + char *const argv[3] = {const_cast(target_program.c_str()), + const_cast(fd_for_child.c_str()), nullptr}; + + // It's necessary to pass `environ` so that in the iOS simulator + // the child process is also launced as a simulator process. + failure = + posix_spawn(&pid_for_target, target_program.c_str(), + /*file_actions=*/&(file_actions.actions), /*attrp=*/nullptr, + /*argv=*/argv, /*envp=*/environ); + if (failure != 0) { + log_failure("Failed to launch %s due to \n", target_program.c_str()); + perror(""); + return false; + } + // Close the write end of the pipe in the parent. + failure = close(write_fd()); + assert(failure == 0); + + printf("Launched %s with PID: %d\n", target_program.c_str(), + pid_for_target); + + // Set up a custom signal handler so that if we crash + // we will kill the child process first. + struct sigaction new_action; + sigemptyset(&(new_action.sa_mask)); + new_action.sa_flags = SA_SIGINFO; + new_action.sa_sigaction = custom_signal_handler; + int signals_to_handle[] = {SIGINT, SIGQUIT, SIGABRT, + SIGBUS, SIGSEGV, SIGFPE}; + for (int index = 0; + index < (sizeof(signals_to_handle) / sizeof(signals_to_handle[0])); + ++index) { + int signal_to_handle = signals_to_handle[index]; + printf("Setting up signal handler for signal %d\n", signal_to_handle); + failure = sigaction(signal_to_handle, &new_action, /*oact=*/nullptr); + if (failure != 0) { + log_failure("Failed to set up signal handler for signal %d\n", + signal_to_handle); + halt_target(); + return false; + } + } + + // Get task port for child process + kern_return_t err = task_for_pid(/*target_tport=*/mach_task_self(), + /*pid=*/pid_for_target, + /*t=*/&task_for_target); + if (err != KERN_SUCCESS) { + log_failure("Failed call to task_for_pid with err: %s\n", + mach_error_string(err)); + halt_target(); + return false; + } + printf("Got task port for PID %d\n", pid_for_target); + return true; + } + + bool halt_target() { + assert(target_started()); + printf("Halting PID %d\n", pid_for_target); + kill(pid_for_target, SIGTERM); + sleep(2); + kill(pid_for_target, SIGKILL); + int status = 0; + pid_t pid = waitpid(pid_for_target, /*stat_loc=*/&status, /*options=*/0); + if (pid != pid_for_target) { + perror("Failed waiting for target process to halt"); + return false; + } + if (!WIFSIGNALED(status)) { + log_failure("Expected process to be killed by a signal\n"); + } else { + // Should be SIGTERM or SIGKILL + auto exit_signal = WTERMSIG(status); + if (exit_signal != SIGTERM && exit_signal != SIGKILL) { + log_failure("Target died with unexpected signal: %d\n", exit_signal); + } + } + target_is_frozen = false; + pid_for_target = 0; + return true; + } + + bool freeze_target() { + if (target_is_frozen) + return true; + auto error = task_suspend2(task_for_target, &task_suspension_token); + if (error != KERN_SUCCESS) { + log_failure("Failed to freeze process: %s\n", mach_error_string(error)); + return false; + } + target_is_frozen = true; + return true; + } + + bool unfreeze_target() { + if (!target_is_frozen) { + return true; + } + auto error = task_resume2(task_suspension_token); + if (error != KERN_SUCCESS) { + log_failure("Failed to unfreeze process: %s\n", mach_error_string(error)); + return false; + } + target_is_frozen = false; + return true; + } + + bool read_payload() { + if (!target_started()) { + return false; + } + + PayloadHolder ph; + bool success = ph.readFromFd(read_fd()); + if (!success) { + log_failure("Failed to read payload\n"); + return false; + } + + // Read payload. + // We expect the child to tell us the address of its + // malloc zone (which we assume is an ASan malloc zone) + // followed by a list of allocations that the child + // knows it has. + assert(ph.size() >= 1); + bool hit_first = false; + for (auto &payload : ph) { + if (!hit_first) { + target_zone_addr = payload.ptr; + hit_first = true; + continue; + } + auto pair = known_allocations.insert({payload.ptr, payload.size}); + assert(pair.second); // Insertion should have happened + } + printf("Target process malloc zone is at 0x%lx" PRIxMAX "\n", + target_zone_addr); + printf("Target process reported the following allocations:\n"); + for (auto &pair : known_allocations) { + printf("Target process ptr 0x%lx size: %lu\n", pair.first, pair.second); + } + return true; + } + + typedef malloc_zone_t *(*get_default_zone_fn_ptr_ty)(void); + + bool load_asan_dylib() { + // We have to set an environment variable to tell the ASan dylib to + // do init differently. + int success = setenv("APPLE_ASAN_INIT_FOR_DLOPEN", "1", /*overwrite=*/1); + if (success != 0) { + perror("Failed to set env"); + return false; + } + void *dlh = dlopen(dylib_path.c_str(), RTLD_LAZY); + if (!dlh) { + log_failure("dlopen of %s failed: %s\n", dylib_path.c_str(), + strerror(errno)); + return false; + } + + // Try to find and call `__sanitizer_mz_default_zone()` function + // which will tell us where ASan's malloc zone lives in the dylib. + void *fn_ptr = dlsym(dlh, "__sanitizer_mz_default_zone"); + if (fn_ptr == nullptr) { + log_failure("dlsym failed:"); + if (const char *error_msg = dlerror()) { + log_failure("%s", error_msg); + } + log_failure("\n"); + return false; + } + auto get_default_zone = + reinterpret_cast(fn_ptr); + malloc_zone_t *asan_dylib_zone = get_default_zone(); + printf("Found ASan malloc zone in asan dylib at:%p\n", asan_dylib_zone); + enumerator = asan_dylib_zone->introspect->enumerator; + printf("Found ASan malloc zone enumerator in asan dylib at:%p\n", + enumerator); + // Unset so that child processes don't also disable ASan. + success = unsetenv("APPLE_ASAN_INIT_FOR_DLOPEN"); + if (success != 0) { + perror("Failed to set env"); + return false; + } + return true; + } + + bool sanity_check_result() { + printf("%lu allocations were found in the target\n", + enumerator_reported_allocations.size()); + + // We should have at least one allocation reported by the enumerator. + if (enumerator_reported_allocations.size() < 1) { + log_failure("Zero allocations found in target\n"); + return false; + } + + // known_allocations should be a subset of enumerator_reported_allocations + assert(known_allocations.size() > 0); + for (const auto &allocation_pair : known_allocations) { + auto addr = allocation_pair.first; + auto size = allocation_pair.second; + auto it_in_enumerator_reported_allocations = + enumerator_reported_allocations.find(addr); + if (it_in_enumerator_reported_allocations == + enumerator_reported_allocations.end()) { + log_failure("A 'known allocation' (%p with size %" PRIdMAX + " ) was not reported by the enumerator\n", + addr, size); + return false; + } + size_t enumerator_reported_size = + it_in_enumerator_reported_allocations->second; + if (enumerator_reported_size != size) { + log_failure( + "Allocation size mismatch. The 'known allocation' was %" PRIdMAX + " bytes but the enumerator reported %" PRIdMAX " bytes\n", + size, enumerator_reported_size); + return false; + } + } + printf("All known %lu known allocations were reported by the enumerator\n ", + known_allocations.size()); + + // There should be no zero sized allocations or allocations in the zero-th + // page. + for (const auto &allocation_pair : enumerator_reported_allocations) { + auto addr = allocation_pair.first; + auto size = allocation_pair.second; + if (get_page_number(addr) == 0) { + log_failure("Enumerator reported allocation in zero-th page\n"); + return false; + } + if (size == 0) { + log_failure("Enumerator reported zero size allocation\n"); + return false; + } + } + + // enumerator_reported_allocations should contain no overlapping + // allocations. O(N^2) + for (const auto &allocation_pair : enumerator_reported_allocations) { + for (const auto &other_allocation_pair : + enumerator_reported_allocations) { + Range first_range(allocation_pair.first, allocation_pair.second); + Range second_range(other_allocation_pair.first, + other_allocation_pair.second); + if (first_range == second_range) { + // Skip the same element + continue; + } + if (first_range.overlaps(second_range)) { + log_failure("Detected overlap\n"); + return false; + } + } + } + return true; + } + + bool analyze() { + printf("Starting analysis in PID: %d\n", getpid()); + if (!load_asan_dylib()) { + return false; + } + + if (!launch_target()) { + return false; + } + bool success; + + success = read_payload(); + if (!success) { + return false; + } + + // Freeze process at this point + success = freeze_target(); + if (!success) { + halt_target(); + return false; + } + + // Call the enumerator making sure we see the known allocations + // that we were told about by the child. + // + // We ask for all range types so we can check we only report + // MALLOC_PTR_IN_USE_RANGE_TYPE regions. + assert(!failed()); + assert(enumerator_reported_allocations.size() == 0); + auto error = + enumerator(task_for_target, (void *)this, + MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE | + MALLOC_ADMIN_REGION_RANGE_TYPE, + target_zone_addr, simple_memory_reader, vm_range_recorder); + if (error != KERN_SUCCESS) { + log_failure("Failed to invoke enumerator: %s\n", + mach_error_string(error)); + halt_target(); + return false; + } + if (failed()) { + log_failure("Detected failures\n"); + halt_target(); + return false; + } + + success = sanity_check_result(); + if (!success) { + log_failure("Sanity check failed\n"); + halt_target(); + return false; + } + + // Unfreeze the process + success = unfreeze_target(); + if (!success) { + halt_target(); + return false; + } + + halt_target(); + return true; + } + + kern_return_t map_memory(task_t remote_task, uintptr_t first_page_addr, + size_t num_pages, uintptr_t &local_address, + bool read_only = true) { + assert(num_pages > 0); + mach_msg_type_number_t mapped_size = 0; // size in bytes + vm_offset_t ptr_to_first_local_page = 0; + auto error = mach_vm_read( + /*target_task=*/remote_task, + /*address=*/first_page_addr, + /*size=*/num_pages * page_size, + /*data=*/&ptr_to_first_local_page, + /*dataCnt=*/&mapped_size); + if (error != KERN_SUCCESS) { + log_failure("Failed mach_vm_read: %s\n", mach_error_string(error)); + return KERN_FAILURE; + } + assert(mapped_size == (num_pages * page_size)); + static_assert(sizeof(local_address) == sizeof(ptr_to_first_local_page), + "size mismatch"); + local_address = ptr_to_first_local_page; + int page_flags = PROT_READ; + if (!read_only) { + page_flags |= PROT_WRITE; + } + error = mprotect(reinterpret_cast(ptr_to_first_local_page), + page_size * num_pages, page_flags); + if (error != 0) { + log_failure("Failed to mprotect() allocated pages starting at %p: %s", + ptr_to_first_local_page, strerror(errno)); + return KERN_FAILURE; + } + return KERN_SUCCESS; + } + + kern_return_t memory_reader(task_t remote_task, vm_address_t remote_address, + vm_size_t size, void **local_memory) { + // Create region to represent the memory we want + auto object_range = Range(remote_address, size); + + // Calculate the first page we want and how many we need + uintptr_t first_page_addr = round_down_to_page_addr(remote_address); + uintptr_t last_byte_addr = remote_address + size - 1; + assert(last_byte_addr >= first_page_addr); + size_t num_pages = + (get_page_number(last_byte_addr) - get_page_number(first_page_addr)) + + 1; + assert(num_pages > 0); + size_t read_offset_from_first_page = remote_address - first_page_addr; + assert(read_offset_from_first_page < page_size); + uintptr_t ptr_to_first_local_page = 0; + + // See if already have any overlapping pages already cached. + Range range_for_vm_region = Range(0, 0); + auto matching_regions = vm_cache.find_vm_regions_for(object_range); + for (auto it = matching_regions.first, ie = matching_regions.second; + it != ie; ++it) { + // See if Region fully encompasses object + if (it->first.contains(object_range)) { + range_for_vm_region = it->first; + ptr_to_first_local_page = it->second; + break; + } + } + if (range_for_vm_region.start == 0) { + // Didn't find a cached VM region. Allocate a new one + range_for_vm_region = Range(first_page_addr, num_pages * page_size); + auto success = map_memory(remote_task, first_page_addr, num_pages, + ptr_to_first_local_page, /*read_only=*/false); + if (success != KERN_SUCCESS) { + *local_memory = 0; + return success; + } + assert(ptr_to_first_local_page); + // Cache the VM region + vm_cache.insert_vm_region(range_for_vm_region, ptr_to_first_local_page); + } + // Check the object is inside the VM region we got back. + assert(range_for_vm_region.contains(object_range)); + + // Compute the offset from the beginning of the VM region to the page we + // want + assert(first_page_addr >= range_for_vm_region.start); + size_t offset_from_vm_region_begin = + first_page_addr - range_for_vm_region.start; + + uintptr_t local_object_addr = ptr_to_first_local_page + + offset_from_vm_region_begin + + read_offset_from_first_page; + assert(local_memory); + *local_memory = reinterpret_cast(local_object_addr); + return KERN_SUCCESS; + } +}; + +// This is global so that `simple_memory_reader` can get +// at the analyzer. +std::unique_ptr globalAnalyzer; + +void custom_signal_handler(int sig_num, siginfo_t *info, void *uap) { + printf("Received signal: %d\n", sig_num); + if (globalAnalyzer) { + // We need to stop the target process if we're about to crash + // otherwise we'll leave a zombie process lying around. + printf("Trying to halt target process\n"); + globalAnalyzer->halt_target(); + } + printf("Exiting with failure.\n"); + exit(1); +} + +kern_return_t simple_memory_reader(task_t remote_task, + vm_address_t remote_address, vm_size_t size, + void **local_memory) { + return globalAnalyzer->memory_reader(remote_task, remote_address, size, + local_memory); +} + +void vm_range_recorder(task_t, void *context, unsigned type, + vm_range_t *range_begin, unsigned num_ranges) { + auto analyzer = reinterpret_cast(context); + if (type != MALLOC_PTR_IN_USE_RANGE_TYPE) { + // We should only see MALLOC_PTR_IN_USE_RANGE_TYPE ranges. + analyzer->log_failure("Saw unsupported range type %d\n", type); + return; + } + if (num_ranges == 0) { + analyzer->log_failure("num_ranges cannot be 0\n"); + return; + } + for (unsigned index = 0; index < num_ranges; ++index) { + vm_range_t range = range_begin[index]; + analyzer->record_range_from_enumerator(range); + } +} + +int main(int argc, char **argv) { + if (argc != 3) { + usage_message(argv[0]); + } + globalAnalyzer.reset(new Analyzer(argv[1], argv[2])); + bool success = globalAnalyzer->analyze(); + if (!success) { + fprintf(stderr, "Failure\n"); + } + return !success; +} Index: test/asan/TestCases/Darwin/zone_enumeration/out_of_process/analyzer_entitlements.plist =================================================================== --- /dev/null +++ test/asan/TestCases/Darwin/zone_enumeration/out_of_process/analyzer_entitlements.plist @@ -0,0 +1,11 @@ + + + + + com.apple.security.cs.debugger + + + Index: test/asan/TestCases/Darwin/zone_enumeration/out_of_process/asan_prog.cc =================================================================== --- /dev/null +++ test/asan/TestCases/Darwin/zone_enumeration/out_of_process/asan_prog.cc @@ -0,0 +1,155 @@ +// This process is the "target" porition of the test. +// +// This program is built with ASan and is the program +// that will be examined by the "analysis" process. +// +// It detects (a reasonable subset of) live allocations in its self and finds +// the address of its malloc zone and sends them to the "analysis" process and +// then sleeps, awaiting examination by the "analysis" process. +#include "protocol.h" +#include +#include +#include +#include +#include +#include +#include +#include + +static const int sleep_time = 5000; + +// This is a hack to gather the known allocations in this process. We don't +// use a std::unordered_map here because that would trigger calls to the +// allocator inside the hook which we want to avoid. +static const size_t kMaxAllocations = 50000; +uintptr_t seen_malloc_addrs[kMaxAllocations]; +size_t seen_malloc_sizes[kMaxAllocations]; +uintptr_t seen_free_addrs[kMaxAllocations]; +bool hook_enabled = true; +uint32_t current_malloc_index = 0; +uint32_t current_free_index = 0; +extern "C" void __sanitizer_malloc_hook(const volatile void *ptr, size_t size) { + if (!hook_enabled) + return; + if (current_malloc_index >= kMaxAllocations) { + // Can't record any more allocations + fprintf(stderr, "WARNING: Can't record any more allocations\n"); + return; + } + seen_malloc_addrs[current_malloc_index] = reinterpret_cast(ptr); + seen_malloc_sizes[current_malloc_index] = size; + ++current_malloc_index; +} +extern "C" void __sanitizer_free_hook(const volatile void *ptr) { + if (!hook_enabled) + return; + if (current_free_index >= kMaxAllocations) { + // Can't record any more frees + fprintf(stderr, "WARNING: Can't record any more frees\n"); + return; + } + seen_free_addrs[current_free_index] = reinterpret_cast(ptr); + ++current_free_index; +} + +int main(int argc, char **argv) { + if (argc != 2) { + fprintf(stderr, "Must have 1 argument"); + return 1; + } + printf("Started demo ASan application\n"); + int write_fd = std::stoi(argv[1]); + printf("Write FD is %d\n", write_fd); + + // Make sure we mark this file descriptor as close-on-exec. + // If we don't then when we fork (e.g. ASan spawning a symbolizer) + // then we might cause parent to hang because `close()` being called + // in this process doesn't actually send EOF to the parent. + if (fcntl(write_fd, F_SETFD, FD_CLOEXEC) == -1) { + fprintf(stderr, "Failed to set FD_CLOEXEC on fd\n"); + return 1; + } + + PayloadHolder ph; + + // First find the ASan malloc zone + // and tell the parent about it. + auto mz = malloc_default_zone(); + printf("Malloc Zone is at: %p\n", mz); + ph.push_back({reinterpret_cast(mz), 0}); + + // Deliberately perform some allocations that we will inform the parent about. + void *a = malloc(512); // Should be served by primary size class allocator + void *b = + malloc(1024 * 1024); // Should be served by secondary mmap based allocator + void *c = malloc(1); + + // Stop recording allocations using the malloc hooks. Operating on + // `known_allocations` might trigger malloc/free so we don't want + // to record these. + hook_enabled = false; + std::unordered_map known_allocations; + // Populate map with observed mallocs + for (unsigned index = 0; index < current_malloc_index; ++index) { + known_allocations.insert( + {seen_malloc_addrs[index], seen_malloc_sizes[index]}); + } + // Removed free'd allocations + for (unsigned index = 0; index < current_free_index; ++index) { + known_allocations.erase(seen_free_addrs[index]); + } + assert(known_allocations.find(reinterpret_cast(a)) != + known_allocations.end()); + assert(known_allocations.find(reinterpret_cast(b)) != + known_allocations.end()); + assert(known_allocations.find(reinterpret_cast(c)) != + known_allocations.end()); + if (known_allocations.size() < 3) { + fprintf(stderr, "known_allocations is too small\n"); + return 1; + } + + // FIXME(dliew): `__asan_describe_address` spawns the symbolizer and that can + // lead to hangs in llvm-lit if our parent process (analysis process) + // crashes. This is probably due to the Darwin symbolizer code giving the + // stderr filedescriptor to the symbolizer process which will prevent + // the underlying file object from being properly closed and EOF + // being sent to llvm-lit. + printf("First allocation:\n"); + // __asan_describe_address(a); + printf("Second allocation:\n"); + // __asan_describe_address(b); + + // Now report the allocations to the parent + for (const auto &allocation : known_allocations) { + ph.push_back({allocation.first, allocation.second}); + } + + bool write_success = ph.writeToFd(write_fd); + if (!write_success) { + fprintf(stderr, "Failed to write to fd\n"); + return 1; + } + // End communication with parent + int close_success = close(write_fd); + if (close_success != 0) { + fprintf(stderr, "Failed to close fd\n"); + } + + // Close stdout/stderr before going to sleep. + // If we don't do this, then if our parent crashes + // at this point then llvm-lit can hang because we still hold + // on to the stdout/stderr file descriptor it has + // (one end of a pipe in llvm-lit). + close(STDOUT_FILENO); + close(STDERR_FILENO); + + // Sleep to give the parent time to freeze this process. + unsigned remaining = sleep_time; + unsigned taken = 0; + do { + taken = sleep(remaining); + remaining = (taken > remaining) ? 0 : (remaining - taken); + } while (remaining > 0); + return 0; +} Index: test/asan/TestCases/Darwin/zone_enumeration/out_of_process/asan_prog_entitlements.plist =================================================================== --- /dev/null +++ test/asan/TestCases/Darwin/zone_enumeration/out_of_process/asan_prog_entitlements.plist @@ -0,0 +1,10 @@ + + + + + com.apple.security.get-task-allow + + + Index: test/asan/TestCases/Darwin/zone_enumeration/out_of_process/enumerate_full_dylib.test =================================================================== --- /dev/null +++ test/asan/TestCases/Darwin/zone_enumeration/out_of_process/enumerate_full_dylib.test @@ -0,0 +1,16 @@ +// This test performs out-of-process enumeration of an ASan process's +// malloc zone. + +RUN: %clangxx -g %S/analysis_prog.cc -o %t-analyzer + +// FIXME: This is only needed for macos. It works by accident for ios like devices +// because we copy to the device during the compile and the signed binary doesn't +// actually end up on the device. +RUN: codesign --force --entitlements %S/analyzer_entitlements.plist -s - %t-analyzer + +RUN: %clangxx_asan %S/asan_prog.cc -o %t-target + +// FIXME: This is only needed for macos. +RUN: codesign --force --entitlements %S/asan_prog_entitlements.plist -s - %t-target + +RUN: %run %t-analyzer %shared_libasan %t-target Index: test/asan/TestCases/Darwin/zone_enumeration/out_of_process/lit.local.cfg =================================================================== --- /dev/null +++ test/asan/TestCases/Darwin/zone_enumeration/out_of_process/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.test'] Index: test/asan/TestCases/Darwin/zone_enumeration/out_of_process/protocol.h =================================================================== --- /dev/null +++ test/asan/TestCases/Darwin/zone_enumeration/out_of_process/protocol.h @@ -0,0 +1,90 @@ +#ifndef TEST_PROTOCOL_H +#define TEST_PROTOCOL_H +#include +#include +#include +#include +#include + +struct Payload { + uintptr_t ptr; + size_t size; + std::string to_str() const { + std::stringstream ss; + ss << "ptr: 0x" << std::hex << ptr << " size: " << std::dec << size; + return ss.str(); + } +}; + +class PayloadHolder { + using DataTy = std::list; + DataTy payloads; + bool debug = false; + +public: + PayloadHolder() { + if (getenv("ASAN_LEAKS_DEBUG")) { + debug = true; + } + } + using iterator = DataTy::iterator; + iterator begin() { return payloads.begin(); } + iterator end() { return payloads.end(); } + + void push_back(Payload &&v) { payloads.push_back(v); } + void push_back(Payload &v) { payloads.push_back(v); } + size_t size() const { return payloads.size(); } + + bool writeToFd(int fd) const { + if (debug) + printf("Sending %lu payloads\n", payloads.size()); + for (auto &payload : payloads) { + ssize_t bytes_left_to_write = sizeof(Payload); + size_t ptr_offset = 0; + do { + auto bytes_written = + write(fd, reinterpret_cast(&payload) + ptr_offset, + bytes_left_to_write); + if (bytes_written == -1) { + return false; + } + bytes_left_to_write -= bytes_written; + ptr_offset += bytes_written; + } while (bytes_left_to_write > 0); + } + if (debug) + printf("Finished sending payload.\n"); + return true; + } + + bool readFromFd(int fd) { + assert(payloads.size() == 0); + while (true) { + Payload temp; + ssize_t bytes_left_to_read = sizeof(Payload); + size_t ptr_offset = 0; + do { + if (debug) + printf("Reading %ld bytes...", bytes_left_to_read); + auto bytes_read = + read(fd, + reinterpret_cast(reinterpret_cast(&temp) + + ptr_offset), + bytes_left_to_read); + if (debug) + printf("%ld\n", bytes_read); + if (bytes_read == -1) + return false; + if (bytes_read == 0) + return true; // EOF + bytes_left_to_read -= bytes_read; + ptr_offset += bytes_read; + } while (bytes_left_to_read > 0); + push_back(temp); + if (debug) + printf("%lu Payload received: %s\n", payloads.size(), + temp.to_str().c_str()); + } + } +}; +#endif