Index: llvm/lib/Support/TimeProfiler.cpp =================================================================== --- llvm/lib/Support/TimeProfiler.cpp +++ llvm/lib/Support/TimeProfiler.cpp @@ -27,39 +27,40 @@ TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; typedef duration DurationType; +typedef time_point TimePointType; typedef std::pair CountAndDurationType; typedef std::pair NameAndCountAndDurationType; struct Entry { - time_point Start; - DurationType Duration; + TimePointType Start; + TimePointType End; std::string Name; std::string Detail; - Entry(time_point &&S, DurationType &&D, std::string &&N, - std::string &&Dt) - : Start(std::move(S)), Duration(std::move(D)), Name(std::move(N)), + Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) + : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Detail(std::move(Dt)){}; }; struct TimeTraceProfiler { - TimeTraceProfiler() { - StartTime = steady_clock::now(); - } + TimeTraceProfiler() { StartTime = steady_clock::now(); } void begin(std::string Name, llvm::function_ref Detail) { - Stack.emplace_back(steady_clock::now(), DurationType{}, std::move(Name), + Stack.emplace_back(steady_clock::now(), TimePointType{}, std::move(Name), Detail()); } void end() { assert(!Stack.empty() && "Must call begin() first"); auto &E = Stack.back(); - E.Duration = steady_clock::now() - E.Start; + E.End = steady_clock::now(); + + // Calculate duration at full precision for overall counts. + DurationType Duration = E.End - E.Start; - // Only include sections longer than TimeTraceGranularity msec. - if (duration_cast(E.Duration).count() > TimeTraceGranularity) + // Only include sections longer than TimeTraceGranularity usec. + if (duration_cast(Duration).count() >= TimeTraceGranularity) Entries.emplace_back(E); // Track total time taken by each "name", but only the topmost levels of @@ -72,7 +73,7 @@ }) == Stack.rend()) { auto &CountAndTotal = CountAndTotalPerName[E.Name]; CountAndTotal.first++; - CountAndTotal.second += E.Duration; + CountAndTotal.second += Duration; } Stack.pop_back(); @@ -88,8 +89,15 @@ // Emit all events for the main flame graph. for (const auto &E : Entries) { - auto StartUs = duration_cast(E.Start - StartTime).count(); - auto DurUs = duration_cast(E.Duration).count(); + // Cast time points to us precision rather than casting duration. + // This avoid truncation issues causing inner scopes overruning outer + // scopes + auto StartUs = (time_point_cast(E.Start) - + time_point_cast(StartTime)) + .count(); + auto DurUs = (time_point_cast(E.End) - + time_point_cast(E.Start)) + .count(); J.object([&]{ J.attribute("pid", 1); @@ -154,7 +162,7 @@ SmallVector Stack; SmallVector Entries; StringMap CountAndTotalPerName; - time_point StartTime; + TimePointType StartTime; // Minimum time granularity (in microseconds) unsigned TimeTraceGranularity; Index: llvm/unittests/Support/CMakeLists.txt =================================================================== --- llvm/unittests/Support/CMakeLists.txt +++ llvm/unittests/Support/CMakeLists.txt @@ -68,6 +68,7 @@ ThreadLocalTest.cpp ThreadPool.cpp Threading.cpp + TimeProfilerTest.cpp TimerTest.cpp TypeNameTest.cpp TypeTraitsTest.cpp Index: llvm/unittests/Support/TimeProfilerTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/Support/TimeProfilerTest.cpp @@ -0,0 +1,90 @@ +//===- unittests/TimeProfilerTest.cpp - Time Profiler tests ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TimeProfiler.h" +#include "gtest/gtest.h" + +using namespace llvm; + +#ifdef NDEBUG +// Run enough iterations to reliably see this issue in Release. +#define ITERATIONS 100000 +#else +// Limit iterations in Debug as this is slow and have not seen this issue in +// Debug configuration (possibly because the gap between inner and outer is too +// long to overcome). +#define ITERATIONS 10 +#endif + +namespace { + +TEST(TimeProfiler, FlameGraph) { + // Test that using time scopes conform to flame graph requirements. + for (int i = 0; i != ITERATIONS; i++) { + // Initialise with granularity 0, to get all scopes. + timeTraceProfilerInitialize(0); + + // Time inner and outer scopes. + { + TimeTraceScope OuterScope("OuterScope", StringRef("")); + { + TimeTraceScope OuterScope("InnerScope", StringRef("")); + } + } + + // Write out time trace to string. + SmallString<1024> Buffer; + raw_svector_ostream OS(Buffer); + timeTraceProfilerWrite(OS); + + // Parse time trace from string to check constraints. + auto O = json::parse(Buffer); + if (!O) { + handleAllErrors(O.takeError(), [Buffer](const llvm::ErrorInfoBase &E) { + FAIL() << "Failed to parse JSON >>> " << Buffer + << " <<<: " << E.message(); + }); + } + json::Array *A = O->getAsObject()->getArray("traceEvents"); + // Trace events should have 5 events: + // InnerScope, OuterScope, Total InnerScope, Total OuterScope, process name + ASSERT_TRUE(A->size() == 5); + + // Get times for inner scope. + auto Inner = (*A)[0].getAsObject(); + ASSERT_TRUE(Inner); + EXPECT_EQ(Inner->getString("name"), llvm::StringRef("InnerScope")); + auto InnerStart = Inner->getInteger("ts").getValue(); + auto InnerDur = Inner->getInteger("dur").getValue(); + auto InnerEnd = InnerStart + InnerDur; + + // Get times for outer scope. + auto Outer = (*A)[1].getAsObject(); + ASSERT_TRUE(Outer); + EXPECT_EQ(Outer->getString("name"), llvm::StringRef("OuterScope")); + auto OuterStart = Outer->getInteger("ts").getValue(); + auto OuterDur = Outer->getInteger("dur").getValue(); + auto OuterEnd = OuterStart + OuterDur; + + // Assert some things that must be true for a flame graph. + // Inner must not be longer than outer. + ASSERT_TRUE(InnerDur <= OuterDur); + // Inner must start at same time or after outer. + ASSERT_TRUE(InnerStart >= OuterStart); + // Inner must not finish after outer. + ASSERT_TRUE(InnerEnd <= OuterEnd); + + // Clean up. + timeTraceProfilerCleanup(); + } +} + +} // namespace