Index: lib/esan/CMakeLists.txt =================================================================== --- lib/esan/CMakeLists.txt +++ lib/esan/CMakeLists.txt @@ -12,6 +12,7 @@ esan_flags.cpp esan_interface.cpp esan_interceptors.cpp + esan_sideline_linux.cpp cache_frag.cpp working_set.cpp working_set_posix.cpp) Index: lib/esan/esan_flags.inc =================================================================== --- lib/esan/esan_flags.inc +++ lib/esan/esan_flags.inc @@ -18,8 +18,24 @@ // ESAN_FLAG(Type, Name, DefaultValue, Description) // See COMMON_FLAG in sanitizer_flags.inc for more details. -// Cross-tool options: +//===----------------------------------------------------------------------===// +// Cross-tool options +//===----------------------------------------------------------------------===// + ESAN_FLAG(int, cache_line_size, 64, "The number of bytes in a cache line. For the working-set tool, this " "cannot be changed without also changing the compiler " "instrumentation.") + +//===----------------------------------------------------------------------===// +// Working set tool options +//===----------------------------------------------------------------------===// + +ESAN_FLAG(bool, record_snapshots, true, + "Working set tool: whether to sample snapshots during a run.") + +// Typical profiling uses a 10ms timer. Our snapshots take some work +// to scan memory so we reduce to 20ms. +// To disable samples, turn off record_snapshots. +ESAN_FLAG(int, sample_freq, 20, + "Working set tool: sampling frequency in milliseconds.") Index: lib/esan/esan_sideline.h =================================================================== --- /dev/null +++ lib/esan/esan_sideline.h @@ -0,0 +1,61 @@ +//===-- esan_sideline.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of EfficiencySanitizer, a family of performance tuners. +// +// Esan sideline thread support. +//===----------------------------------------------------------------------===// + +#ifndef ESAN_SIDELINE_H +#define ESAN_SIDELINE_H + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __esan { + +typedef void (*SidelineFunc)(void *Arg); + +// Currently only one sideline thread is supported. +// It calls the SidelineFunc passed to launchThread once on each sample at the +// given frequency in real time (i.e., wall clock time). +class SidelineThread { +public: + // We cannot initialize any fields in the constructor as it will be called + // *after* launchThread for a static instance, as esan.module_ctor is called + // before static initializers. + SidelineThread() {} + ~SidelineThread() {} + + // To simplify declaration in sanitizer code where we want to avoid + // heap allocations, the constructor and destructor do nothing and + // launchThread and joinThread do the real work. + // They should each be called just once. + bool launchThread(SidelineFunc takeSample, void *Arg, u32 FreqMilliSec); + bool joinThread(); + + // Must be called from the sideline thread itself. + bool adjustTimer(u32 FreqMilliSec); + +private: + static int runSideline(void *Arg); + static void registerSignal(int SigNum); + static void handleSidelineSignal(int SigNum, void *SigInfo, void *Ctx); + + char *Stack; + SidelineFunc sampleFunc; + void *FuncArg; + u32 Freq; + uptr SidelineId; + atomic_uintptr_t SidelineExit; +}; + +} // namespace __esan + +#endif // ESAN_SIDELINE_H Index: lib/esan/esan_sideline_linux.cpp =================================================================== --- /dev/null +++ lib/esan/esan_sideline_linux.cpp @@ -0,0 +1,173 @@ +//===-- esan_sideline_linux.cpp ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of EfficiencySanitizer, a family of performance tuners. +// +// Support for a separate or "sideline" tool thread on Linux. +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_platform.h" +#if SANITIZER_LINUX + +#include "esan_sideline.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_linux.h" +#include +#include +#include +#include +#include +#include +#include + +namespace __esan { + +static const int SigAltStackSize = 4*1024; +static const int SidelineStackSize = 4*1024; + +// FIXME: we'll need some kind of TLS (can we trust that a pthread key will +// work in our non-POSIX thread?) to access our data in our signal handler +// with multiple sideline threads. For now we assume there is only one +// sideline thread and we use a dirty solution of a global var. +static SidelineThread *TheThread; + +// We aren't passing SA_NODEFER so the same signal is blocked while here. +void SidelineThread::handleSidelineSignal(int SigNum, void *SigInfo, void *Ctx) { + VPrintf(3, "Sideline signal %d\n", SigNum); + if (SigNum == SIGALRM) { + // See above about needing TLS to avoid this global var. + SidelineThread *Thread = TheThread; + if (atomic_load(&Thread->SidelineExit, memory_order_relaxed) != 0) + return; + Thread->sampleFunc(Thread->FuncArg); + } else + UNREACHABLE("signal not registered"); +} + +void SidelineThread::registerSignal(int SigNum) { + __sanitizer_sigaction SigAct; + internal_memset(&SigAct, 0, sizeof(SigAct)); + SigAct.sigaction = handleSidelineSignal; + // We do not pass SA_NODEFER as we want to block the same signal. + SigAct.sa_flags = SA_ONSTACK | SA_SIGINFO; + int Res = internal_sigaction(SigNum, &SigAct, nullptr); + CHECK(Res == 0); +} + +int SidelineThread::runSideline(void *Arg) { + VPrintf(1, "Sideline thread starting\n"); + SidelineThread *Thread = static_cast(Arg); + + // If the parent dies, we want to exit also. + internal_prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); + + // Set up a signal handler on an alternate stack for safety. + InternalScopedBuffer StackMap(SigAltStackSize); + struct sigaltstack SigAltStack; + internal_memset(&SigAltStack, 0, sizeof(SigAltStack)); + SigAltStack.ss_sp = StackMap.data(); + SigAltStack.ss_size = SigAltStackSize; + internal_sigaltstack(&SigAltStack, nullptr); + + // We inherit the signal mask from the app thread. In case + // we weren't created at init time, we ensure the mask is empty. + __sanitizer_sigset_t SigSet; + internal_sigfillset(&SigSet); + int Res = internal_sigprocmask(SIG_UNBLOCK, &SigSet, nullptr); + CHECK(Res == 0); + + registerSignal(SIGALRM); + + if (!Thread->adjustTimer(Thread->Freq)) { + Printf("FATAL: EfficiencySanitizer failed to set up itimer\n"); + Die(); + } + + // We loop, doing nothing but handling itimer signals. + while (atomic_load(&TheThread->SidelineExit, memory_order_relaxed) == 0) + sched_yield(); + + if (!Thread->adjustTimer(0)) + VPrintf(1, "Failed to disable timer\n"); + + VPrintf(1, "Sideline thread exiting\n"); + return 0; +} + +bool SidelineThread::launchThread(SidelineFunc takeSample, void *Arg, + u32 FreqMilliSec) { + // This can only be called once. However, we can't clear a field in + // the constructor and check for that here as the constructor for + // a static instance is called *after* our module_ctor and thus after + // this routine! Thus we rely on the TheThread check below. + CHECK(TheThread == nullptr); // Only one sideline thread is supported. + TheThread = this; + sampleFunc = takeSample; + FuncArg = Arg; + Freq = FreqMilliSec; + atomic_store(&SidelineExit, 0, memory_order_relaxed); + + // We do without a guard page. + Stack = static_cast(MmapOrDie(SidelineStackSize, "SidelineStack")); + // By omitting CLONE_THREAD, the child is in its own thread group and will not + // receive any of the application's signals. + SidelineId = internal_clone( + runSideline, Stack + SidelineStackSize, + CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_UNTRACED, + this, nullptr /* parent_tidptr */, + nullptr /* newtls */, nullptr /* child_tidptr */); + int ErrCode; + if (internal_iserror(SidelineId, &ErrCode)) { + Printf("FATAL: EfficiencySanitizer failed to spawn a thread (code %d).\n", + ErrCode); + Die(); + return false; // Not reached. + } + return true; +} + +bool SidelineThread::joinThread() { + VPrintf(1, "Joining sideline thread\n"); + bool Res = true; + atomic_store(&SidelineExit, 1, memory_order_relaxed); + while (true) { + uptr Status = internal_waitpid(SidelineId, nullptr, __WALL); + int ErrCode; + if (!internal_iserror(Status, &ErrCode)) + break; + if (ErrCode == EINTR) + continue; + VPrintf(1, "Failed to join sideline thread (errno %d)\n", ErrCode); + Res = false; + break; + } + UnmapOrDie(Stack, SidelineStackSize); + return Res; +} + +// Must be called from the sideline thread itself. +bool SidelineThread::adjustTimer(u32 FreqMilliSec) { + CHECK(internal_getpid() == SidelineId); + Freq = FreqMilliSec; + struct itimerval TimerVal; + TimerVal.it_interval.tv_sec = (time_t) Freq / 1000; + TimerVal.it_interval.tv_usec = (time_t) (Freq % 1000) * 1000; + TimerVal.it_value.tv_sec = (time_t) Freq / 1000; + TimerVal.it_value.tv_usec = (time_t) (Freq % 1000) * 1000; + // As we're in a different thread group, we cannot use either + // ITIMER_PROF or ITIMER_VIRTUAL without taking up scheduled + // time ourselves: thus we must use real time. + int Res = setitimer(ITIMER_REAL, &TimerVal, nullptr); + return (Res == 0); +} + +} // namespace __esan + +#endif // SANITIZER_LINUX Index: lib/esan/working_set.cpp =================================================================== --- lib/esan/working_set.cpp +++ lib/esan/working_set.cpp @@ -16,6 +16,7 @@ #include "esan.h" #include "esan_flags.h" #include "esan_shadow.h" +#include "esan_sideline.h" #include "sanitizer_common/sanitizer_procmaps.h" // We shadow every cache line of app memory with one shadow byte. @@ -40,6 +41,12 @@ static const byte ShadowAccessedVal = (1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx); +static SidelineThread Thread; +// If we use real-time-based timer samples this won't overflow in any realistic +// scenario, but if we switch to some other unit (such as memory accesses) we +// may want to consider a 64-bit int. +static u32 SnapshotNum; + void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size, bool IsWrite) { if (Size == 0) @@ -124,9 +131,22 @@ return WorkingSetSize; } +// This is invoked from a signal handler but in a sideline thread doing nothing +// else so it is a little less fragile than a typical signal handler. +static void takeSample(void *Arg) { + // FIXME: record the size and report at process end. For now this simply + // serves as a test of the sideline thread functionality. + VReport(1, "%s: snapshot #%d: %u\n", SanitizerToolName, SnapshotNum, + computeWorkingSizeAndReset(CurWorkingSetBitIdx)); + ++SnapshotNum; +} + void initializeWorkingSet() { CHECK(getFlags()->cache_line_size == CacheLineSize); registerShadowFault(); + + if (getFlags()->record_snapshots) + Thread.launchThread(takeSample, nullptr, getFlags()->sample_freq); } static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) { @@ -147,6 +167,9 @@ } int finalizeWorkingSet() { + if (getFlags()->record_snapshots) + Thread.joinThread(); + // Get the working set size for the entire execution. u32 NumOfCachelines = computeWorkingSizeAndReset(TotalWorkingSetBitIdx); const char *Unit; Index: test/esan/TestCases/workingset-samples.cpp =================================================================== --- /dev/null +++ test/esan/TestCases/workingset-samples.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_esan_wset -O0 %s -o %t 2>&1 +// RUN: %env_esan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s + +#include +#include +#include +#include + +const int size = 0x1 << 25; // 523288 cache lines + +int main(int argc, char **argv) { + char *buf = (char *)mmap(0, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + // Try to increase the probability that the sideline thread is + // scheduled. Unfortunately we can't do proper synchronization + // without some form of annotation or something. + sched_yield(); + // Do enough work to get at least 2 samples. + for (int i = 0; i < size; ++i) + buf[i] = i; + munmap(buf, size); + // CHECK: {{.*}}EfficiencySanitizer: snapshot {{.*}} + // CHECK-NEXT: {{.*}}EfficiencySanitizer: snapshot {{.*}} + // CHECK: {{.*}} EfficiencySanitizer: the total working set size: 32 MB (5242{{[0-9][0-9]}} cache lines) + return 0; +} Index: test/esan/Unit/circular_buffer.cpp =================================================================== --- test/esan/Unit/circular_buffer.cpp +++ test/esan/Unit/circular_buffer.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx_unit -O0 %s -o %t 2>&1 -// RUN: %run %t 2>&1 | FileCheck %s +// RUN: %env_esan_opts="record_snapshots=0" %run %t 2>&1 | FileCheck %s #include "esan/esan_circular_buffer.h" #include "sanitizer_common/sanitizer_placement_new.h" Index: test/esan/lit.cfg =================================================================== --- test/esan/lit.cfg +++ test/esan/lit.cfg @@ -17,6 +17,7 @@ esan_incdir = config.test_source_root + "/../../lib" unit_cxxflags = (["-I%s" % esan_incdir, "-std=c++11", # We need to link with the esan runtime. + # Tests should pass %env_esan_opts="record_snapshots=0". "-fsanitize=efficiency-working-set"] + base_cxxflags) def build_invocation(compile_flags):