Index: lib/sanitizer_common/sanitizer_linux.h =================================================================== --- lib/sanitizer_common/sanitizer_linux.h +++ lib/sanitizer_common/sanitizer_linux.h @@ -76,9 +76,16 @@ public: explicit ThreadLister(pid_t pid); ~ThreadLister(); - bool ListThreads(InternalMmapVector *threads); + enum Result { + Error, + Incomplete, + Ok, + }; + Result ListThreads(InternalMmapVector *threads); private: + bool IsAlive(int tid); + pid_t pid_; int descriptor_ = -1; InternalMmapVector buffer_; Index: lib/sanitizer_common/sanitizer_linux.cc =================================================================== --- lib/sanitizer_common/sanitizer_linux.cc +++ lib/sanitizer_common/sanitizer_linux.cc @@ -915,30 +915,75 @@ } } -bool ThreadLister::ListThreads(InternalMmapVector *threads) { +ThreadLister::Result ThreadLister::ListThreads( + InternalMmapVector *threads) { if (internal_iserror(descriptor_)) - return false; + return Error; internal_lseek(descriptor_, 0, SEEK_SET); threads->clear(); - while (uptr read = internal_getdents(descriptor_, - (struct linux_dirent *)buffer_.data(), - buffer_.size())) { + Result result = Ok; + for (bool first_read = true;; first_read = false) { + // Resize to max capacity if it was downsized by IsAlive. + buffer_.resize(buffer_.capacity()); + CHECK_GE(buffer_.size(), 4096); + uptr read = internal_getdents( + descriptor_, (struct linux_dirent *)buffer_.data(), buffer_.size()); + if (!read) + return result; if (internal_iserror(read)) { Report("Can't read directory entries from /proc/%d/task.\n", pid_); - return false; + return Error; } for (uptr begin = (uptr)buffer_.data(), end = begin + read; begin < end;) { struct linux_dirent *entry = (struct linux_dirent *)begin; begin += entry->d_reclen; - if (entry->d_ino && *entry->d_name >= '0' && *entry->d_name <= '9') { - // Found a valid tid. - threads->push_back(internal_atoll(entry->d_name)); + if (entry->d_ino == 1) { + // Inode 1 is for bad blocks and also can be a reason for early return. + // Should be emitted if kernel tried to output terminating thread. + // See proc_task_readdir implementation in Linux. + result = Incomplete; } + if (entry->d_ino && *entry->d_name >= '0' && *entry->d_name <= '9') + threads->push_back(internal_atoll(entry->d_name)); + } + + // Now we are going to detect short-read or early EOF. In such cases Linux + // can return inconsistent list with missing alive threads. + // Code will just remember that the list is possible incomplete but it will + // continue reads to return as much as possible. + if (!first_read) { + // The first one was a short-read by definition. + result = Incomplete; + } else if (read > buffer_.size() - 1024) { + // Read was close to the buffer size. So double the size and assume the + // worst. + buffer_.resize(buffer_.size() * 2); + result = Incomplete; + } else if (!threads->empty() && !IsAlive(threads->back())) { + // Maybe Linux early returned from read on terminated thread (!pid_alive) + // and failed to restore read position. + // See next_tid and proc_task_instantiate in Linux. + result = Incomplete; } } - return true; +} + +bool ThreadLister::IsAlive(int tid) { + // /proc/%d/task/%d/status uses same call to detect alive threads as + // proc_task_readdir. See task_state implementation in Linux. + char path[80]; + internal_snprintf(path, sizeof(path), "/proc/%d/task/%d/status", pid_, tid); + if (!ReadFileToVector(path, &buffer_) || buffer_.empty()) + return false; + buffer_.push_back(0); + static const char kPrefix[] = "\nPPid:"; + const char *field = internal_strstr(buffer_.data(), kPrefix); + if (!field) + return false; + field += internal_strlen(kPrefix); + return (int)internal_atoll(field) != 0; } ThreadLister::~ThreadLister() { Index: lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc =================================================================== --- lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc +++ lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc @@ -210,25 +210,23 @@ bool ThreadSuspender::SuspendAllThreads() { ThreadLister thread_lister(pid_); bool added_threads; - bool first_iteration = true; InternalMmapVector threads; threads.reserve(128); do { - // Run through the directory entries once. added_threads = false; - if (!thread_lister.ListThreads(&threads)) { - ResumeAllThreads(); - return false; + switch (thread_lister.ListThreads(&threads)) { + case ThreadLister::Error: + ResumeAllThreads(); + return false; + case ThreadLister::Incomplete: + added_threads = true; + break; + case ThreadLister::Ok: + break; } for (tid_t tid : threads) if (SuspendThread(tid)) added_threads = true; - if (first_iteration && !added_threads) { - // Detach threads and fail. - ResumeAllThreads(); - return false; - } - first_iteration = false; } while (added_threads); return true; }