Index: llvm/lib/Support/TimeProfiler.cpp =================================================================== --- llvm/lib/Support/TimeProfiler.cpp +++ llvm/lib/Support/TimeProfiler.cpp @@ -27,39 +27,55 @@ TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; typedef duration DurationType; +typedef time_point TimePointType; typedef std::pair CountAndDurationType; typedef std::pair NameAndCountAndDurationType; struct Entry { - time_point Start; - DurationType Duration; + TimePointType Start; + TimePointType End; std::string Name; std::string Detail; - Entry(time_point &&S, DurationType &&D, std::string &&N, - std::string &&Dt) - : Start(std::move(S)), Duration(std::move(D)), Name(std::move(N)), + Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) + : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Detail(std::move(Dt)){}; + + // Calculate timings for FlameGraph. Cast time points to microsecond precision + // rather than casting duration. This avoid truncation issues causing inner + // scopes overruning outer scopes. + long long getFlameGraphStartUs(TimePointType StartTime) const { + return (time_point_cast(Start) - + time_point_cast(StartTime)) + .count(); + } + + long long getFlameGraphDurUs() const { + return (time_point_cast(End) - + time_point_cast(Start)) + .count(); + } }; struct TimeTraceProfiler { - TimeTraceProfiler() { - StartTime = steady_clock::now(); - } + TimeTraceProfiler() { StartTime = steady_clock::now(); } void begin(std::string Name, llvm::function_ref Detail) { - Stack.emplace_back(steady_clock::now(), DurationType{}, std::move(Name), + Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), Detail()); } void end() { assert(!Stack.empty() && "Must call begin() first"); auto &E = Stack.back(); - E.Duration = steady_clock::now() - E.Start; + E.End = steady_clock::now(); + + // Calculate duration at full precision for overall counts. + DurationType Duration = E.End - E.Start; // Only include sections longer or equal to TimeTraceGranularity msec. - if (duration_cast(E.Duration).count() >= TimeTraceGranularity) + if (duration_cast(Duration).count() >= TimeTraceGranularity) Entries.emplace_back(E); // Track total time taken by each "name", but only the topmost levels of @@ -72,15 +88,68 @@ }) == Stack.rend()) { auto &CountAndTotal = CountAndTotalPerName[E.Name]; CountAndTotal.first++; - CountAndTotal.second += E.Duration; + CountAndTotal.second += Duration; } Stack.pop_back(); } + // Check that the TimeProfile conforms to Flame Graph assumptions. + void checkFlameGraph() const { + // Sort primarily by start time and secondarily by reverse end time. + // Do this on the steady_clock precision start and end times (easier and + // should not be any worse or better). + auto SortedEntries = SmallVector(Entries); + llvm::sort(SortedEntries, [](const Entry &A, const Entry &B) { + if (A.Start != B.Start) { + return A.Start < B.Start; + } + return A.End > B.End; + }); + + // Stack of when current scopes end in microseconds. + SmallVector EndStack; + // List of end times (in microseconds) in order they should exit according + // to stack. + std::vector EndList; + EndList.reserve(Entries.size()); + + // Reconstruct stack by working through events. + for (const auto &E : SortedEntries) { + auto StartUs = E.getFlameGraphStartUs(StartTime); + auto DurUs = E.getFlameGraphDurUs(); + auto EndUs = StartUs + DurUs; + + // Remove closed scopes from EndStack and add to EndList in order exited. + while (!EndStack.empty() && (StartUs >= EndStack.back())) { + EndList.push_back(EndStack.back()); + EndStack.pop_back(); + } + // Add end time of this entry to stack. + EndStack.push_back(EndUs); + } + + // Empty stack of unclosed scopes. + while (!EndStack.empty()) { + EndList.push_back(EndStack.back()); + EndStack.pop_back(); + } + // Check EndList monotonically increases. + long long LastEnd = 0; + for (const auto &End : EndList) { + assert((End >= LastEnd) && + "TimeProfiler scope ended earlier than a deeper scope"); + LastEnd = End; + } + } + void Write(raw_pwrite_stream &OS) { assert(Stack.empty() && "All profiler sections should be ended when calling Write"); +#ifndef NDEBUG + checkFlameGraph(); +#endif + json::OStream J(OS); J.objectBegin(); J.attributeBegin("traceEvents"); @@ -88,8 +157,8 @@ // Emit all events for the main flame graph. for (const auto &E : Entries) { - auto StartUs = duration_cast(E.Start - StartTime).count(); - auto DurUs = duration_cast(E.Duration).count(); + auto StartUs = E.getFlameGraphStartUs(StartTime); + auto DurUs = E.getFlameGraphDurUs(); J.object([&]{ J.attribute("pid", 1); @@ -154,7 +223,7 @@ SmallVector Stack; SmallVector Entries; StringMap CountAndTotalPerName; - time_point StartTime; + TimePointType StartTime; // Minimum time granularity (in microseconds) unsigned TimeTraceGranularity;