Index: llvm/include/llvm/Support/TimeProfiler.h =================================================================== --- llvm/include/llvm/Support/TimeProfiler.h +++ llvm/include/llvm/Support/TimeProfiler.h @@ -77,13 +77,60 @@ #define LLVM_SUPPORT_TIMEPROFILER_H #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Process.h" +#include +#include + +typedef std::chrono::duration + DurationType; +typedef std::chrono::time_point TimePointType; +typedef std::pair CountAndDurationType; +typedef std::pair + NameAndCountAndDurationType; namespace llvm { class raw_pwrite_stream; -struct TimeTraceProfiler; +struct Entry { + const TimePointType Start; + TimePointType End; + const std::string Name; + const std::string Detail; + + Entry(TimePointType &&S, TimePointType &&E, std::string &&N, + std::string &&Dt); + + // Calculate timings for FlameGraph. + std::chrono::steady_clock::rep + getFlameGraphStartUs(TimePointType StartTime) const; + std::chrono::steady_clock::rep getFlameGraphDurUs() const; +}; +struct TimeTraceProfiler { + TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = ""); + void begin(std::string Name, llvm::function_ref Detail); + void end(); + + // Write events from this TimeTraceProfilerInstance and + // ThreadTimeTraceProfilerInstances. + void write(raw_pwrite_stream &OS); + + SmallVector Stack; + SmallVector Entries; + StringMap CountAndTotalPerName; + const std::chrono::time_point BeginningOfTime; + const TimePointType StartTime; + const std::string ProcName; + const sys::Process::Pid Pid; + SmallString<0> ThreadName; + const uint64_t Tid; + + // Minimum time granularity (in microseconds) + const unsigned TimeTraceGranularity; +}; TimeTraceProfiler *getTimeTraceProfilerInstance(); /// Initialize the time trace profiler. @@ -157,6 +204,13 @@ } }; +struct TimeTraceProfilerInstances { + std::mutex Lock; + std::vector List; +}; +static TimeTraceProfilerInstances Instances; +TimeTraceProfilerInstances &getTimeTraceProfilerInstances(); + } // end namespace llvm #endif Index: llvm/lib/Support/TimeProfiler.cpp =================================================================== --- llvm/lib/Support/TimeProfiler.cpp +++ llvm/lib/Support/TimeProfiler.cpp @@ -15,275 +15,226 @@ #include "llvm/ADT/StringMap.h" #include "llvm/Support/JSON.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" #include "llvm/Support/Threading.h" #include #include -#include #include #include #include using namespace llvm; -namespace { +// Per Thread instance +static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; + +TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { + return TimeTraceProfilerInstance; +} + +namespace llvm { -using std::chrono::duration; using std::chrono::duration_cast; using std::chrono::microseconds; using std::chrono::steady_clock; using std::chrono::system_clock; -using std::chrono::time_point; using std::chrono::time_point_cast; -struct TimeTraceProfilerInstances { - std::mutex Lock; - std::vector List; -}; - -TimeTraceProfilerInstances &getTimeTraceProfilerInstances() { - static TimeTraceProfilerInstances Instances; - return Instances; +Entry::Entry(TimePointType &&S, TimePointType &&E, std::string &&N, + std::string &&Dt) + : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), + Detail(std::move(Dt)) {} + +// Calculate timings for FlameGraph. Cast time points to microsecond precision +// rather than casting duration. This avoid truncation issues causing inner +// scopes overruning outer scopes. +std::chrono::steady_clock::rep Entry::getFlameGraphStartUs(TimePointType StartTime) const { + return (time_point_cast(Start) - + time_point_cast(StartTime)) + .count(); } -} // anonymous namespace - -// Per Thread instance -static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; - -TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { - return TimeTraceProfilerInstance; +std::chrono::steady_clock::rep Entry::getFlameGraphDurUs() const { + return (time_point_cast(End) - + time_point_cast(Start)) + .count(); } -namespace { - -using ClockType = steady_clock; -using TimePointType = time_point; -using DurationType = duration; -using CountAndDurationType = std::pair; -using NameAndCountAndDurationType = - std::pair; - -/// Represents an open or completed time section entry to be captured. -struct TimeTraceProfilerEntry { - const TimePointType Start; - TimePointType End; - const std::string Name; - const std::string Detail; - - TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, - std::string &&Dt) - : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), - Detail(std::move(Dt)) {} - - // Calculate timings for FlameGraph. Cast time points to microsecond precision - // rather than casting duration. This avoids truncation issues causing inner - // scopes overruning outer scopes. - ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { - return (time_point_cast(Start) - - time_point_cast(StartTime)) - .count(); - } - - ClockType::rep getFlameGraphDurUs() const { - return (time_point_cast(End) - - time_point_cast(Start)) - .count(); - } -}; +TimeTraceProfiler::TimeTraceProfiler(unsigned TimeTraceGranularity, + StringRef ProcName) + : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()), + ProcName(ProcName), Pid(sys::Process::getProcessId()), + Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) { + llvm::get_thread_name(ThreadName); +} -} // anonymous namespace +void TimeTraceProfiler::begin(std::string Name, + llvm::function_ref Detail) { + Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), + Detail()); +} -struct llvm::TimeTraceProfiler { - TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") - : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), - ProcName(ProcName), Pid(sys::Process::getProcessId()), - Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) { - llvm::get_thread_name(ThreadName); +void TimeTraceProfiler::end() { + assert(!Stack.empty() && "Must call begin() first"); + Entry &E = Stack.back(); + E.End = steady_clock::now(); + + // Check that end times monotonically increase. + assert((Entries.empty() || + (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= + Entries.back().getFlameGraphStartUs(StartTime) + + Entries.back().getFlameGraphDurUs())) && + "TimeProfiler scope ended earlier than previous scope"); + + // Calculate duration at full precision for overall counts. + DurationType Duration = E.End - E.Start; + + // Only include sections longer or equal to TimeTraceGranularity msec. + if (duration_cast(Duration).count() >= TimeTraceGranularity) + Entries.emplace_back(E); + + // Track total time taken by each "name", but only the topmost levels of + // them; e.g. if there's a template instantiation that instantiates other + // templates from within, we only want to add the topmost one. "topmost" + // happens to be the ones that don't have any currently open entries above + // itself. + if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), + [&](const Entry &Val) { return Val.Name == E.Name; })) { + auto &CountAndTotal = CountAndTotalPerName[E.Name]; + CountAndTotal.first++; + CountAndTotal.second += Duration; } - void begin(std::string Name, llvm::function_ref Detail) { - Stack.emplace_back(ClockType::now(), TimePointType(), std::move(Name), - Detail()); - } + Stack.pop_back(); +} - void end() { - assert(!Stack.empty() && "Must call begin() first"); - TimeTraceProfilerEntry &E = Stack.back(); - E.End = ClockType::now(); - - // Check that end times monotonically increase. - assert((Entries.empty() || - (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= - Entries.back().getFlameGraphStartUs(StartTime) + - Entries.back().getFlameGraphDurUs())) && - "TimeProfiler scope ended earlier than previous scope"); - - // Calculate duration at full precision for overall counts. - DurationType Duration = E.End - E.Start; - - // Only include sections longer or equal to TimeTraceGranularity msec. - if (duration_cast(Duration).count() >= TimeTraceGranularity) - Entries.emplace_back(E); - - // Track total time taken by each "name", but only the topmost levels of - // them; e.g. if there's a template instantiation that instantiates other - // templates from within, we only want to add the topmost one. "topmost" - // happens to be the ones that don't have any currently open entries above - // itself. - if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), - [&](const TimeTraceProfilerEntry &Val) { - return Val.Name == E.Name; - })) { - auto &CountAndTotal = CountAndTotalPerName[E.Name]; - CountAndTotal.first++; - CountAndTotal.second += Duration; - } - - Stack.pop_back(); - } +// Write events from this TimeTraceProfilerInstance and +// ThreadTimeTraceProfilerInstances. +void TimeTraceProfiler::write(raw_pwrite_stream &OS) { + // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. + auto &Instances = getTimeTraceProfilerInstances(); + std::lock_guard Lock(Instances.Lock); + assert(Stack.empty() && + "All profiler sections should be ended when calling write"); + assert(llvm::all_of(Instances.List, + [](const auto &TTP) { return TTP->Stack.empty(); }) && + "All profiler sections should be ended when calling write"); + + json::OStream J(OS); + J.objectBegin(); + J.attributeBegin("traceEvents"); + J.arrayBegin(); + + // Emit all events for the main flame graph. + auto writeEvent = [&](const auto &E, uint64_t Tid) { + auto StartUs = E.getFlameGraphStartUs(StartTime); + auto DurUs = E.getFlameGraphDurUs(); + + J.object([&] { + J.attribute("pid", Pid); + J.attribute("tid", int64_t(Tid)); + J.attribute("ph", "X"); + J.attribute("ts", StartUs); + J.attribute("dur", DurUs); + J.attribute("name", E.Name); + if (!E.Detail.empty()) { + J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); + } + }); + }; + for (const Entry &E : Entries) + writeEvent(E, this->Tid); + for (const TimeTraceProfiler *TTP : Instances.List) + for (const Entry &E : TTP->Entries) + writeEvent(E, TTP->Tid); + + // Emit totals by section name as additional "thread" events, sorted from + // longest one. + // Find highest used thread id. + uint64_t MaxTid = this->Tid; + for (const TimeTraceProfiler *TTP : Instances.List) + MaxTid = std::max(MaxTid, TTP->Tid); + + // Combine all CountAndTotalPerName from threads into one. + StringMap AllCountAndTotalPerName; + auto combineStat = [&](const auto &Stat) { + StringRef Key = Stat.getKey(); + auto Value = Stat.getValue(); + auto &CountAndTotal = AllCountAndTotalPerName[Key]; + CountAndTotal.first += Value.first; + CountAndTotal.second += Value.second; + }; + for (const auto &Stat : CountAndTotalPerName) + combineStat(Stat); + for (const TimeTraceProfiler *TTP : Instances.List) + for (const auto &Stat : TTP->CountAndTotalPerName) + combineStat(Stat); - // Write events from this TimeTraceProfilerInstance and - // ThreadTimeTraceProfilerInstances. - void write(raw_pwrite_stream &OS) { - // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. - auto &Instances = getTimeTraceProfilerInstances(); - std::lock_guard Lock(Instances.Lock); - assert(Stack.empty() && - "All profiler sections should be ended when calling write"); - assert(llvm::all_of(Instances.List, - [](const auto &TTP) { return TTP->Stack.empty(); }) && - "All profiler sections should be ended when calling write"); - - json::OStream J(OS); - J.objectBegin(); - J.attributeBegin("traceEvents"); - J.arrayBegin(); - - // Emit all events for the main flame graph. - auto writeEvent = [&](const auto &E, uint64_t Tid) { - auto StartUs = E.getFlameGraphStartUs(StartTime); - auto DurUs = E.getFlameGraphDurUs(); - - J.object([&] { - J.attribute("pid", Pid); - J.attribute("tid", int64_t(Tid)); - J.attribute("ph", "X"); - J.attribute("ts", StartUs); - J.attribute("dur", DurUs); - J.attribute("name", E.Name); - if (!E.Detail.empty()) { - J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); - } + std::vector SortedTotals; + SortedTotals.reserve(AllCountAndTotalPerName.size()); + for (const auto &Total : AllCountAndTotalPerName) + SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue()); + + llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A, + const NameAndCountAndDurationType &B) { + return A.second.second > B.second.second; + }); + + // Report totals on separate threads of tracing file. + uint64_t TotalTid = MaxTid + 1; + for (const NameAndCountAndDurationType &Total : SortedTotals) { + auto DurUs = duration_cast(Total.second.second).count(); + auto Count = AllCountAndTotalPerName[Total.first].first; + + J.object([&] { + J.attribute("pid", Pid); + J.attribute("tid", int64_t(TotalTid)); + J.attribute("ph", "X"); + J.attribute("ts", 0); + J.attribute("dur", DurUs); + J.attribute("name", "Total " + Total.first); + J.attributeObject("args", [&] { + J.attribute("count", int64_t(Count)); + J.attribute("avg ms", int64_t(DurUs / Count / 1000)); }); - }; - for (const TimeTraceProfilerEntry &E : Entries) - writeEvent(E, this->Tid); - for (const TimeTraceProfiler *TTP : Instances.List) - for (const TimeTraceProfilerEntry &E : TTP->Entries) - writeEvent(E, TTP->Tid); - - // Emit totals by section name as additional "thread" events, sorted from - // longest one. - // Find highest used thread id. - uint64_t MaxTid = this->Tid; - for (const TimeTraceProfiler *TTP : Instances.List) - MaxTid = std::max(MaxTid, TTP->Tid); - - // Combine all CountAndTotalPerName from threads into one. - StringMap AllCountAndTotalPerName; - auto combineStat = [&](const auto &Stat) { - StringRef Key = Stat.getKey(); - auto Value = Stat.getValue(); - auto &CountAndTotal = AllCountAndTotalPerName[Key]; - CountAndTotal.first += Value.first; - CountAndTotal.second += Value.second; - }; - for (const auto &Stat : CountAndTotalPerName) - combineStat(Stat); - for (const TimeTraceProfiler *TTP : Instances.List) - for (const auto &Stat : TTP->CountAndTotalPerName) - combineStat(Stat); - - std::vector SortedTotals; - SortedTotals.reserve(AllCountAndTotalPerName.size()); - for (const auto &Total : AllCountAndTotalPerName) - SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue()); - - llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A, - const NameAndCountAndDurationType &B) { - return A.second.second > B.second.second; }); - // Report totals on separate threads of tracing file. - uint64_t TotalTid = MaxTid + 1; - for (const NameAndCountAndDurationType &Total : SortedTotals) { - auto DurUs = duration_cast(Total.second.second).count(); - auto Count = AllCountAndTotalPerName[Total.first].first; - - J.object([&] { - J.attribute("pid", Pid); - J.attribute("tid", int64_t(TotalTid)); - J.attribute("ph", "X"); - J.attribute("ts", 0); - J.attribute("dur", DurUs); - J.attribute("name", "Total " + Total.first); - J.attributeObject("args", [&] { - J.attribute("count", int64_t(Count)); - J.attribute("avg ms", int64_t(DurUs / Count / 1000)); - }); - }); + ++TotalTid; + } - ++TotalTid; - } - - auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, - StringRef arg) { - J.object([&] { - J.attribute("cat", ""); - J.attribute("pid", Pid); - J.attribute("tid", int64_t(Tid)); - J.attribute("ts", 0); - J.attribute("ph", "M"); - J.attribute("name", Name); - J.attributeObject("args", [&] { J.attribute("name", arg); }); - }); - }; + auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, StringRef arg) { + J.object([&] { + J.attribute("cat", ""); + J.attribute("pid", Pid); + J.attribute("tid", int64_t(Tid)); + J.attribute("ts", 0); + J.attribute("ph", "M"); + J.attribute("name", Name); + J.attributeObject("args", [&] { J.attribute("name", arg); }); + }); + }; - writeMetadataEvent("process_name", Tid, ProcName); - writeMetadataEvent("thread_name", Tid, ThreadName); - for (const TimeTraceProfiler *TTP : Instances.List) - writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName); + writeMetadataEvent("process_name", Tid, ProcName); + writeMetadataEvent("thread_name", Tid, ThreadName); + for (const TimeTraceProfiler *TTP : Instances.List) + writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName); - J.arrayEnd(); - J.attributeEnd(); + J.arrayEnd(); + J.attributeEnd(); - // Emit the absolute time when this TimeProfiler started. - // This can be used to combine the profiling data from - // multiple processes and preserve actual time intervals. - J.attribute("beginningOfTime", - time_point_cast(BeginningOfTime) - .time_since_epoch() - .count()); + // Emit the absolute time when this TimeProfiler started. + // This can be used to combine the profiling data from + // multiple processes and preserve actual time intervals. + J.attribute("beginningOfTime", time_point_cast(BeginningOfTime) + .time_since_epoch() + .count()); - J.objectEnd(); - } + J.objectEnd(); +} - SmallVector Stack; - SmallVector Entries; - StringMap CountAndTotalPerName; - // System clock time when the session was begun. - const time_point BeginningOfTime; - // Profiling clock time when the session was begun. - const TimePointType StartTime; - const std::string ProcName; - const sys::Process::Pid Pid; - SmallString<0> ThreadName; - const uint64_t Tid; - - // Minimum time granularity (in microseconds) - const unsigned TimeTraceGranularity; -}; +TimeTraceProfilerInstances &getTimeTraceProfilerInstances() { + return Instances; +} +} // namespace llvm void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, StringRef ProcName) {