diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h --- a/llvm/include/llvm/Support/TimeProfiler.h +++ b/llvm/include/llvm/Support/TimeProfiler.h @@ -14,7 +14,7 @@ namespace llvm { struct TimeTraceProfiler; -extern TimeTraceProfiler *TimeTraceProfilerInstance; +extern LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance; /// Initialize the time trace profiler. /// This sets up the global \p TimeTraceProfilerInstance @@ -25,6 +25,9 @@ /// Cleanup the time trace profiler, if it was initialized. void timeTraceProfilerCleanup(); +/// Finish a time trace profiler running on a worker thread. +void timeTraceProfilerFinishThread(); + /// Is the time trace profiler enabled, i.e. initialized? inline bool timeTraceProfilerEnabled() { return TimeTraceProfilerInstance != nullptr; diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -15,16 +15,25 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Threading.h" +#include #include #include +#include #include #include using namespace std::chrono; +namespace { +std::mutex Mu; +std::vector + ThreadTimeTraceProfilerInstances; // guarded by Mu +} // namespace + namespace llvm { -TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; +LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; typedef duration DurationType; typedef time_point TimePointType; @@ -61,7 +70,7 @@ struct TimeTraceProfiler { TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") : StartTime(steady_clock::now()), ProcName(ProcName), - TimeTraceGranularity(TimeTraceGranularity) {} + Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {} void begin(std::string Name, llvm::function_ref Detail) { Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), @@ -103,22 +112,31 @@ Stack.pop_back(); } + // Write events from this TimeTraceProfilerInstance and + // ThreadTimeTraceProfilerInstances. void Write(raw_pwrite_stream &OS) { + // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. + std::lock_guard Lock(Mu); assert(Stack.empty() && "All profiler sections should be ended when calling Write"); + assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(), + ThreadTimeTraceProfilerInstances.end(), + [](const auto &TTP) { return TTP->Stack.empty(); }) && + "All profiler sections should be ended when calling Write"); + json::OStream J(OS); J.objectBegin(); J.attributeBegin("traceEvents"); J.arrayBegin(); // Emit all events for the main flame graph. - for (const auto &E : Entries) { + auto writeEvent = [&](const auto &E, uint64_t Tid) { auto StartUs = E.getFlameGraphStartUs(StartTime); auto DurUs = E.getFlameGraphDurUs(); J.object([&]{ J.attribute("pid", 1); - J.attribute("tid", 0); + J.attribute("tid", int64_t(Tid)); J.attribute("ph", "X"); J.attribute("ts", StartUs); J.attribute("dur", DurUs); @@ -127,39 +145,73 @@ J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); } }); + }; + for (const auto &E : Entries) { + writeEvent(E, this->Tid); + } + for (const auto &TTP : ThreadTimeTraceProfilerInstances) { + for (const auto &E : TTP->Entries) { + writeEvent(E, TTP->Tid); + } } // Emit totals by section name as additional "thread" events, sorted from // longest one. - int Tid = 1; + // Find highest used thread id. + uint64_t MaxTid = this->Tid; + for (const auto &TTP : ThreadTimeTraceProfilerInstances) { + MaxTid = std::max(MaxTid, TTP->Tid); + } + + // Combine all CountAndTotalPerName from threads into one. + StringMap AllCountAndTotalPerName; + auto combineStat = [&](const auto &Stat) { + std::string Key = Stat.getKey(); + auto Value = Stat.getValue(); + auto &CountAndTotal = AllCountAndTotalPerName[Key]; + CountAndTotal.first += Value.first; + CountAndTotal.second += Value.second; + }; + for (const auto &Stat : CountAndTotalPerName) { + combineStat(Stat); + } + for (const auto &TTP : ThreadTimeTraceProfilerInstances) { + for (const auto &Stat : TTP->CountAndTotalPerName) { + combineStat(Stat); + } + } + std::vector SortedTotals; - SortedTotals.reserve(CountAndTotalPerName.size()); - for (const auto &E : CountAndTotalPerName) - SortedTotals.emplace_back(E.getKey(), E.getValue()); + SortedTotals.reserve(AllCountAndTotalPerName.size()); + for (const auto &Total : AllCountAndTotalPerName) + SortedTotals.emplace_back(Total.getKey(), Total.getValue()); llvm::sort(SortedTotals.begin(), SortedTotals.end(), [](const NameAndCountAndDurationType &A, const NameAndCountAndDurationType &B) { return A.second.second > B.second.second; }); - for (const auto &E : SortedTotals) { - auto DurUs = duration_cast(E.second.second).count(); - auto Count = CountAndTotalPerName[E.first].first; + + // Report totals on separate threads of tracing file. + uint64_t TotalTid = MaxTid + 1; + for (const auto &Total : SortedTotals) { + auto DurUs = duration_cast(Total.second.second).count(); + auto Count = AllCountAndTotalPerName[Total.first].first; J.object([&]{ J.attribute("pid", 1); - J.attribute("tid", Tid); + J.attribute("tid", int64_t(TotalTid)); J.attribute("ph", "X"); J.attribute("ts", 0); J.attribute("dur", DurUs); - J.attribute("name", "Total " + E.first); + J.attribute("name", "Total " + Total.first); J.attributeObject("args", [&] { J.attribute("count", int64_t(Count)); J.attribute("avg ms", int64_t(DurUs / Count / 1000)); }); }); - ++Tid; + ++TotalTid; } // Emit metadata event with process name. @@ -183,6 +235,7 @@ StringMap CountAndTotalPerName; const TimePointType StartTime; const std::string ProcName; + const uint64_t Tid; // Minimum time granularity (in microseconds) const unsigned TimeTraceGranularity; @@ -196,8 +249,21 @@ TimeTraceGranularity, llvm::sys::path::filename(ProcName)); } +// Removes all TimeTraceProfilerInstances. +// Called from main thread. void timeTraceProfilerCleanup() { delete TimeTraceProfilerInstance; + std::lock_guard Lock(Mu); + for (auto TTP : ThreadTimeTraceProfilerInstances) + delete TTP; + ThreadTimeTraceProfilerInstances.clear(); +} + +// Finish TimeTraceProfilerInstance on a worker thread. +// This doesn't remove the instance, just moves the pointer to global vector. +void timeTraceProfilerFinishThread() { + std::lock_guard Lock(Mu); + ThreadTimeTraceProfilerInstances.push_back(TimeTraceProfilerInstance); TimeTraceProfilerInstance = nullptr; }