diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h --- a/llvm/include/llvm/Support/TimeProfiler.h +++ b/llvm/include/llvm/Support/TimeProfiler.h @@ -5,12 +5,80 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +// This provides lightweight and dependency-free machinery to trace execution +// time around arbitrary code. Two API flavors are available. +// +// The primary API uses a RAII object to trigger tracing: +// +// \code +// { +// TimeTraceScope scope("my_event_name"); +// ...my code... +// } +// \endcode +// +// If the code to be profiled does not have a natural lexical scope then +// it is also possible to start and end events with respect to an implicit +// per-thread stack of profiling entries: +// +// \code +// timeTraceProfilerBegin("my_event_name"); +// ...my code... +// timeTraceProfilerEnd(); // must be called on all control flow paths +// \endcode +// +// Time profiling entries can be given an arbitrary name and, optionally, +// an arbitrary 'detail' string. The resulting trace will include 'Total' +// entries summing the time spent for each name. Thus, it's best to choose +// names to be fairly generic, and rely on the detail field to capture +// everything else of interest. +// +// To avoid lifetime issues name and detail strings are copied into the event +// entries at their time of creation. Care should be taken to make string +// construction cheap to prevent 'Heisenperf' effects. In particular, the +// 'detail' argument may be a string-returning closure: +// +// \code +// int n; +// { +// TimeTraceScope scope("my_event_name", +// [n]() { return (Twine("x=") + Twine(n)).str(); }); +// ...my code... +// } +// \endcode +// The closure will not be called if tracing is disabled. Otherwise, the +// resulting string will be directly moved into the entry. +// +// The main process should begin with a timeTraceProfilerInitialize, and +// finish with timeTraceProfileWrite and timeTraceProfilerCleanup calls. +// Each new thread should begin with a timeTraceProfilerInitialize, and +// finish with a timeTraceProfilerFinishThread call. +// +// Timestamps come from std::chrono::stable_clock. Note that threads need +// not see the same time from that clock, and the resolution may not be +// the best available. +// +// Currently, there are a number of compatible viewers: +// - chrome://tracing is the original chromium trace viewer. +// - http://ui.perfetto.dev is the replacement for the above, under active +// development by Google as part of the 'Perfetto' project. +// - https://www.speedscope.app/ has also been reported as an option. +// +// Future work: +// - Support akin to LLVM_DEBUG for runtime enable/disable of named tracing +// families for non-debug builds which wish to support optional tracing. +// - Evaluate the detail closures at profile write time to avoid +// stringification costs interfering with tracing. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_TIMEPROFILER_H #define LLVM_SUPPORT_TIMEPROFILER_H -#include "llvm/Support/Error.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" namespace llvm { @@ -39,7 +107,7 @@ /// Write profiling data to output stream. /// Data produced is JSON, in Chrome "Trace Event" format, see /// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview -void timeTraceProfilerWrite(raw_pwrite_stream &OS); +void timeTraceProfilerWrite(raw_ostream &OS); /// Write profiling data to a file. /// The function will write to \p PreferredFileName if provided, if not diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -24,11 +24,18 @@ #include #include -using namespace std::chrono; using namespace llvm; namespace { +using std::chrono::duration; +using std::chrono::duration_cast; +using std::chrono::microseconds; +using std::chrono::steady_clock; +using std::chrono::system_clock; +using std::chrono::time_point; +using std::chrono::time_point_cast; + struct TimeTraceProfilerInstances { std::mutex Lock; std::vector List; @@ -48,57 +55,62 @@ return TimeTraceProfilerInstance; } -typedef duration DurationType; -typedef time_point TimePointType; -typedef std::pair CountAndDurationType; -typedef std::pair - NameAndCountAndDurationType; - namespace { -struct Entry { + +using ClockType = steady_clock; +using TimePointType = time_point; +using DurationType = duration; +using CountAndDurationType = std::pair; +using NameAndCountAndDurationType = + std::pair; + +/// Represents an open or completed time section entry to be captured. +struct TimeTraceProfilerEntry { const TimePointType Start; TimePointType End; const std::string Name; const std::string Detail; - Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) + TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, + std::string &&Dt) : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Detail(std::move(Dt)) {} // Calculate timings for FlameGraph. Cast time points to microsecond precision - // rather than casting duration. This avoid truncation issues causing inner + // rather than casting duration. This avoids truncation issues causing inner // scopes overruning outer scopes. - steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const { + ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { return (time_point_cast(Start) - time_point_cast(StartTime)) .count(); } - steady_clock::rep getFlameGraphDurUs() const { + ClockType::rep getFlameGraphDurUs() const { return (time_point_cast(End) - time_point_cast(Start)) .count(); } }; -} // namespace + +} // anonymous namespace struct llvm::TimeTraceProfiler { TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") - : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()), + : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), ProcName(ProcName), Pid(sys::Process::getProcessId()), Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) { llvm::get_thread_name(ThreadName); } void begin(std::string Name, llvm::function_ref Detail) { - Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), + Stack.emplace_back(ClockType::now(), TimePointType(), std::move(Name), Detail()); } void end() { assert(!Stack.empty() && "Must call begin() first"); - Entry &E = Stack.back(); - E.End = steady_clock::now(); + TimeTraceProfilerEntry &E = Stack.back(); + E.End = ClockType::now(); // Check that end times monotonically increase. assert((Entries.empty() || @@ -120,7 +132,9 @@ // happens to be the ones that don't have any currently open entries above // itself. if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), - [&](const Entry &Val) { return Val.Name == E.Name; })) { + [&](const TimeTraceProfilerEntry &Val) { + return Val.Name == E.Name; + })) { auto &CountAndTotal = CountAndTotalPerName[E.Name]; CountAndTotal.first++; CountAndTotal.second += Duration; @@ -131,7 +145,7 @@ // Write events from this TimeTraceProfilerInstance and // ThreadTimeTraceProfilerInstances. - void write(raw_pwrite_stream &OS) { + void write(raw_ostream &OS) { // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. auto &Instances = getTimeTraceProfilerInstances(); std::lock_guard Lock(Instances.Lock); @@ -163,10 +177,10 @@ } }); }; - for (const Entry &E : Entries) + for (const TimeTraceProfilerEntry &E : Entries) writeEvent(E, this->Tid); for (const TimeTraceProfiler *TTP : Instances.List) - for (const Entry &E : TTP->Entries) + for (const TimeTraceProfilerEntry &E : TTP->Entries) writeEvent(E, TTP->Tid); // Emit totals by section name as additional "thread" events, sorted from @@ -255,10 +269,12 @@ J.objectEnd(); } - SmallVector Stack; - SmallVector Entries; + SmallVector Stack; + SmallVector Entries; StringMap CountAndTotalPerName; + // System clock time when the session was begun. const time_point BeginningOfTime; + // Profiling clock time when the session was begun. const TimePointType StartTime; const std::string ProcName; const sys::Process::Pid Pid; @@ -299,7 +315,7 @@ TimeTraceProfilerInstance = nullptr; } -void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { +void llvm::timeTraceProfilerWrite(raw_ostream &OS) { assert(TimeTraceProfilerInstance != nullptr && "Profiler object can't be null"); TimeTraceProfilerInstance->write(OS); diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -86,6 +86,7 @@ ThreadPool.cpp Threading.cpp TimerTest.cpp + TimeProfilerTest.cpp ToolOutputFileTest.cpp TypeNameTest.cpp TypeTraitsTest.cpp diff --git a/llvm/unittests/Support/TimeProfilerTest.cpp b/llvm/unittests/Support/TimeProfilerTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/Support/TimeProfilerTest.cpp @@ -0,0 +1,64 @@ +//===- unittests/TimeProfilerTest.cpp - TimeProfiler tests ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// These are bare-minimum 'smoke' tests of the time profiler. Not tested: +// - multi-threading +// - 'Total' entries +// - elision of short or ill-formed entries +// - detail callback +// - no calls to now() if profiling is disabled +// - suppression of contributions to total entries for nested entries +//===----------------------------------------------------------------------===// + +#include "llvm/Support/TimeProfiler.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +void setupProfiler() { + timeTraceProfilerInitialize(/*TimeTraceGranularity=*/0, "test"); +} + +std::string teardownProfiler() { + std::string json; + raw_string_ostream os(json); + timeTraceProfilerWrite(os); + timeTraceProfilerCleanup(); + return json; +} + +TEST(TimeProfiler, Scope_Smoke) { + setupProfiler(); + + { TimeTraceScope scope("event", "detail"); } + + std::string json = teardownProfiler(); + ASSERT_TRUE(json.find(R"("name":"event")") != std::string::npos); + ASSERT_TRUE(json.find(R"("detail":"detail")") != std::string::npos); +} + +TEST(TimeProfiler, Begin_End_Smoke) { + setupProfiler(); + + timeTraceProfilerBegin("event", "detail"); + timeTraceProfilerEnd(); + + std::string json = teardownProfiler(); + ASSERT_TRUE(json.find(R"("name":"event")") != std::string::npos); + ASSERT_TRUE(json.find(R"("detail":"detail")") != std::string::npos); +} + +TEST(TimeProfiler, Begin_End_Disabled) { + // Nothing should be observable here. The test is really just making sure + // we've not got a stray nullptr deref. + timeTraceProfilerBegin("event", "detail"); + timeTraceProfilerEnd(); +} + +} // namespace