Index: mlir/include/mlir/Pass/Pass.h =================================================================== --- mlir/include/mlir/Pass/Pass.h +++ mlir/include/mlir/Pass/Pass.h @@ -149,6 +149,17 @@ ArrayRef getStatistics() const { return statistics; } MutableArrayRef getStatistics() { return statistics; } + /// Returns the prototype of this pass. + /// + /// If this pass was created through `clonePass()`, returns the original pass + /// it was cloned from. This is useful for diagnostic purposes to distinguish + /// passes that were replicated internally in the pass manager (e.g., for + /// threading purposes) from passes purposefully instantiated by the user. + /// Used to collapse passes in timing statistics. + const Pass *getPrototype() const { + return passPrototype ? passPrototype : this; + } + protected: explicit Pass(TypeID passID, Optional opName = llvm::None) : passID(passID), opName(opName) {} @@ -184,6 +195,7 @@ /// A clone method to create a copy of this pass. std::unique_ptr clone() const { auto newInst = clonePass(); + newInst->passPrototype = getPrototype(); newInst->copyOptionValuesFrom(this); return newInst; } @@ -296,6 +308,10 @@ /// The pass options registered to this pass instance. detail::PassOptions passOptions; + /// A pointer to the pass this pass was cloned from, or `nullptr` if the pass + /// is an original. + const Pass *passPrototype = nullptr; + /// Allow access to 'clone'. friend class OpPassManager; Index: mlir/include/mlir/Pass/PassManager.h =================================================================== --- mlir/include/mlir/Pass/PassManager.h +++ mlir/include/mlir/Pass/PassManager.h @@ -12,6 +12,7 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OperationSupport.h" #include "mlir/Support/LogicalResult.h" +#include "mlir/Support/Timing.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" @@ -150,18 +151,7 @@ // PassManager //===----------------------------------------------------------------------===// -/// An enum describing the different display modes for the information within -/// the pass manager. -enum class PassDisplayMode { - // In this mode the results are displayed in a list sorted by total, - // with each pass/analysis instance aggregated into one unique result. - List, - - // In this mode the results are displayed in a nested pipeline view that - // mirrors the internal pass pipeline that is being executed in the pass - // manager. - Pipeline, -}; +using PassDisplayMode = TimingDisplayMode; /// The main pass manager and pipeline builder. class PassManager : public OpPassManager { @@ -313,38 +303,26 @@ //===--------------------------------------------------------------------===// // Pass Timing - /// A configuration struct provided to the pass timing feature. - class PassTimingConfig { - public: - using PrintCallbackFn = function_ref; - - /// Initialize the configuration. - /// * 'displayMode' switch between list or pipeline display (see the - /// `PassDisplayMode` enum documentation). - explicit PassTimingConfig( - PassDisplayMode displayMode = PassDisplayMode::Pipeline) - : displayMode(displayMode) {} - - virtual ~PassTimingConfig(); - - /// A hook that may be overridden by a derived config to control the - /// printing. The callback is supplied by the framework and the config is - /// responsible to call it back with a stream for the output. - virtual void printTiming(PrintCallbackFn printCallback); - - /// Return the `PassDisplayMode` this config was created with. - PassDisplayMode getDisplayMode() { return displayMode; } + using PassTimingConfig = TimingConfig; - private: - PassDisplayMode displayMode; - }; + /// Add an instrumentation to time the execution of passes and the computation + /// of analyses. This function creates a temporary timing manager with the + /// given optional `config`. An execution time report will be printed as soon + /// as this `PassManager` is destroyed. + /// + /// Note: Timing should be enabled after all other instrumentations to avoid + /// any potential "ghost" timing from other instrumentations being + /// unintentionally included in the timing results. + void enableTiming(std::unique_ptr config = nullptr); /// Add an instrumentation to time the execution of passes and the computation - /// of analyses. + /// of analyses. Timing will be reported by nesting timers into the provided + /// `timingScope`. + /// /// Note: Timing should be enabled after all other instrumentations to avoid /// any potential "ghost" timing from other instrumentations being /// unintentionally included in the timing results. - void enableTiming(std::unique_ptr config = nullptr); + void enableTiming(TimingScope &timingScope); /// Prompts the pass manager to print the statistics collected for each of the /// held passes after each call to 'run'. @@ -368,6 +346,16 @@ /// Flag that specifies if pass statistics should be dumped. Optional passStatisticsMode; + /// Timing manager owned by this pass manager (in case timing is enabled by + /// the user without providing an external timing manager). This *must* appear + /// before the instrumentor to ensure the timing manager is destroyed *after* + /// the instrumentor, since the latter may hold timer pointers into the + /// former. + std::unique_ptr ownedTimingManager; + + /// See `ownedTimingManager`. + TimingScope ownedTimingScope; + /// A manager for pass instrumentations. std::unique_ptr instrumentor; Index: mlir/include/mlir/Support/Timing.h =================================================================== --- /dev/null +++ mlir/include/mlir/Support/Timing.h @@ -0,0 +1,243 @@ +//===- Timing.h - Execution time measurement facilities ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Facilities to measure and provide statistics on execution time. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_SUPPORT_TIMING_H +#define MLIR_SUPPORT_TIMING_H + +#include "mlir/IR/Identifier.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include + +namespace mlir { + +class TimerBase { +public: + /// Start the timer. + virtual void start() = 0; + + /// Stop the timer. + virtual void stop() = 0; + + /// Hide the timer in timing reports and directly show its children. Optional. + /// May be unsupported by implementations. + virtual void setTransparent(bool transparent) {} + + /// Create a child timer nested within this one. Multiple calls to this + /// function with the same unique identifier `id` will return the same child + /// timer. + /// + /// This function can be called from other threads, as long as this timer + /// outlives any uses of the child timer on the other thread. + virtual TimerBase *nest(const void *id, + std::function &&nameBuilder) = 0; + + /// See above. + TimerBase *nest(Identifier name) { + return nest(name.getAsOpaquePointer(), [=]() { return name.str(); }); + } + + /// See above. + TimerBase *nest(StringRef name, MLIRContext *context) { + return nest(Identifier::get(name, context)); + } +}; + +/// An RAII-style wrapper around a timer that ensures the timer is properly +/// started and stopped. +class TimingScope { + TimerBase *timer; + +public: + TimingScope() : timer(nullptr) {} + TimingScope(TimerBase *timer) : timer(timer) { + if (timer) + timer->start(); + } + TimingScope(TimingScope &&other) : timer(other.release()) {} + ~TimingScope() { stop(); } + + TimingScope &operator=(TimingScope &&other) { + stop(); + timer = other.release(); + return *this; + } + + // Disable copying of the `TimingScope`. + TimingScope(const TimingScope &) = delete; + TimingScope &operator=(const TimingScope &) = delete; + + /// Manually stop the timer early. + void stop() { + if (timer) + timer->stop(); + timer = nullptr; + } + + /// Hide the timer in timing reports and directly show its children. + void setTransparent(bool transparent) { + if (timer) + timer->setTransparent(transparent); + } + + /// Create a nested timing scope. + /// + /// This returns a new `TimingScope` with a timer nested within the current + /// scope. In this fashion, the time in this scope may be further subdivided + /// in a more fine-grained fashion. + template + TimingScope nest(Args... args) { + return TimingScope(timer ? timer->nest(std::forward(args)...) + : nullptr); + } + + /// Implicit call to `bool` to check if the timing scope actually contains a + /// valid timer. + operator bool() const { return timer != nullptr; } + +private: + /// Release control of the timer in this scope, without stopping it. + TimerBase *release() { + auto t = timer; + timer = nullptr; + return t; + } +}; + +/// An enum describing the different display modes for the information within +/// the timing manager. +enum class TimingDisplayMode { + /// In this mode the results are displayed in a list sorted by total time, + /// with timers aggregated into one unique result per timer name. + List, + + // In this mode the results are displayed in a nested pipeline view that + // resembles the internal pass pipeline that is being executed in a pass + // manager. + Pipeline, +}; + +/// A configuration struct provided to the timing facilities. +class TimingConfig { +public: + using PrintCallbackFn = llvm::function_ref; + + /// Initialize the configuration. + /// * 'displayMode' switches between list or pipeline display (see the + /// `TimingDisplayMode` enum documentation). + explicit TimingConfig( + TimingDisplayMode displayMode = TimingDisplayMode::Pipeline) + : displayMode(displayMode) {} + + virtual ~TimingConfig(); + + /// A hook that may be overridden by a derived config to control printing of + /// timing results. The callback is supplied by the framework and the config + /// is responsible to call it back with a stream for the output. The default + /// implementation uses `llvm::CreateInfoOutputFile()` as stream. + virtual void printTiming(PrintCallbackFn printCallback); + + /// Return the `TimingDisplayMode` this config was created with. + TimingDisplayMode getDisplayMode() { return displayMode; } + +private: + TimingDisplayMode displayMode; +}; + +/// An abstract interface to an object that can be used for execution time +/// measurement. +class TimingManagerBase { +public: + /// Get the root timer of this timing manager. The returned timer must be + /// started and stopped manually. Execution time can be measured by nesting + /// timers within the root timer and starting/stopping them as appropriate. + /// Use this function only if you need access to the timer itself. Otherwise + /// consider the more convenient `getRootScope()` which offers an RAII-style + /// wrapper around the timer. This function returns a `nullptr` if timing is + /// disabled. + virtual TimerBase *getRootTimer() = 0; + + /// Get the root timer of this timing manager wrapped in a `TimingScope` for + /// convenience. Automatically starts the timer and stops it as soon as the + /// `TimingScope` goes out of scope (is destroyed). + virtual TimingScope getRootScope() { return TimingScope(getRootTimer()); } + + /// Enable execution time sampling. + virtual void enableTiming(std::unique_ptr config = nullptr) = 0; +}; + +/// A manager for execution time measurement and report printing. +/// +/// This is MLIR's default implementation of a `TimingManagerBase`. Prints an +/// execution time report upon destruction, or manually through `print()`. Use +/// a `TimingConfig` to control the formatting of the report and the stream to +/// which it is printed. Use `getRootScope()` for the most convenient way to +/// measure execution time. Library code should accept a `TimingManagerBase` or +/// `TimerBase` rather than this concrete implementation to allow the user to +/// substitute their own implementation. +class TimingManager : public TimingManagerBase { +public: + TimingManager(); + TimingManager(TimingManager &&rhs); + ~TimingManager(); + + // Disable copying of the `TimingManager`. + TimingManager(const TimingManager &rhs) = delete; + TimingManager &operator=(const TimingManager &rhs) = delete; + + virtual TimerBase *getRootTimer() override; + + /// Enable execution time sampling. + virtual void + enableTiming(std::unique_ptr config = nullptr) override; + + /// Print and clear the timing results. Only call this when there are no more + /// references to nested timers around, as printing post-processes and clears + /// the timers. + void print(); + + /// Clear the timing results. Only call this when there are no more references + /// to nested timers around, as clearing invalidates them. + void clear(); + + /// Debug print the timer data structures to an output stream. + void dumpTimers(llvm::raw_ostream &os = llvm::errs()); + + /// Debug print the timers as a list. Only call this when there are no more + /// references to nested timers around. + void dumpAsList(llvm::raw_ostream &os = llvm::errs()); + + /// Debug print the timers as a pipeline. Only call this when there are no + /// more references to nested timers around. + void dumpAsPipeline(llvm::raw_ostream &os = llvm::errs()); + +private: + class Timer; + std::unique_ptr rootTimer; + std::unique_ptr config; +}; + +/// Register a set of useful command-line options that can be used to configure +/// a timing manager. The values of these options can be applied via the +/// `applyTimingManagerCLOptions` method below. +void registerTimingManagerCLOptions(); + +/// Apply any values provided to the timing manager options that were registered +/// with 'registerTimingManagerOptions'. +void applyTimingManagerCLOptions(TimingManagerBase &tm); + +} // namespace mlir + +#endif // MLIR_SUPPORT_TIMING_H Index: mlir/lib/Pass/PassTiming.cpp =================================================================== --- mlir/lib/Pass/PassTiming.cpp +++ mlir/lib/Pass/PassTiming.cpp @@ -8,453 +8,121 @@ #include "PassDetail.h" #include "mlir/Pass/PassManager.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Threading.h" + #include using namespace mlir; using namespace mlir::detail; -constexpr StringLiteral kPassTimingDescription = - "... Pass execution timing report ..."; +//===----------------------------------------------------------------------===// +// PassTiming +//===----------------------------------------------------------------------===// namespace { -/// Simple record class to record timing information. -struct TimeRecord { - TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} - - TimeRecord &operator+=(const TimeRecord &other) { - wall += other.wall; - user += other.user; - return *this; - } - - /// Print the current time record to 'os', with a breakdown showing - /// contributions to the give 'total' time record. - void print(raw_ostream &os, const TimeRecord &total) { - if (total.user != total.wall) - os << llvm::format(" %7.4f (%5.1f%%) ", user, - 100.0 * user / total.user); - os << llvm::format(" %7.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall); - } - - double wall, user; -}; - -/// An enumeration of the different types of timers. -enum class TimerKind { - /// This timer represents an ordered collection of pass timers, corresponding - /// to a pass pipeline. - Pipeline, +struct PassTiming : public PassInstrumentation { + PassTiming(TimingScope &timingScope) : rootScope(timingScope) {} - /// This timer represents a collection of pipeline timers. - PipelineCollection, + /// If a pass can spawn additional work on other threads, it records the + /// index to its currently active timer here. Passes that run on a + /// newly-forked thread will check this list to find the active timer of the + /// parent thread into which the new thread should be nested. + DenseMap parentTimerIndices; - /// This timer represents an analysis or pass timer. - PassOrAnalysis -}; + /// A stack of the currently active timing scopes per thread. + DenseMap> activeThreadTimers; -struct Timer { - explicit Timer(std::string &&name, TimerKind kind) - : name(std::move(name)), kind(kind) {} + /// The root timing scope into which timing is reported. + TimingScope &rootScope; - /// Start the timer. - void start() { startTime = std::chrono::system_clock::now(); } + //===--------------------------------------------------------------------===// + // Pipeline + //===--------------------------------------------------------------------===// - /// Stop the timer. - void stop() { - auto newTime = std::chrono::system_clock::now() - startTime; - wallTime += newTime; - userTime += newTime; - } - - /// Get or create a child timer with the provided name and id. - Timer *getChildTimer(const void *id, TimerKind kind, - std::function &&nameBuilder) { - auto &child = children[id]; - if (!child) - child = std::make_unique(nameBuilder(), kind); - return child.get(); - } + void runBeforePipeline(Identifier name, + const PipelineParentInfo &parentInfo) override { + auto tid = llvm::get_threadid(); + auto &activeTimers = activeThreadTimers[tid]; - /// Returns the total time for this timer in seconds. - TimeRecord getTotalTime() { - // If this is a pass or analysis timer, use the recorded time directly. - if (kind == TimerKind::PassOrAnalysis) { - return TimeRecord( - std::chrono::duration_cast>(wallTime) - .count(), - std::chrono::duration_cast>(userTime) - .count()); + TimingScope *parentScope; + if (activeTimers.empty()) { + auto it = parentTimerIndices.find(parentInfo); + if (it != parentTimerIndices.end()) + parentScope = + &activeThreadTimers[parentInfo.parentThreadID][it->second]; + else + parentScope = &rootScope; + } else { + parentScope = &activeTimers.back(); } - - // Otherwise, accumulate the timing from each of the children. - TimeRecord totalTime; - for (auto &child : children) - totalTime += child.second->getTotalTime(); - return totalTime; + activeTimers.push_back(parentScope->nest(name.getAsOpaquePointer(), [name] { + return ("'" + name.strref() + "' Pipeline").str(); + })); } - /// A map of unique identifiers to child timers. - using ChildrenMap = llvm::MapVector>; - - /// Merge the timing data from 'other' into this timer. - void merge(Timer &&other) { - if (wallTime < other.wallTime) - wallTime = other.wallTime; - userTime += other.userTime; - mergeChildren(std::move(other.children)); + void runAfterPipeline(Identifier, const PipelineParentInfo &) override { + auto &activeTimers = activeThreadTimers[llvm::get_threadid()]; + assert(!activeTimers.empty() && "expected active timer"); + activeTimers.pop_back(); } - /// Merge the timer children in 'otherChildren' with the children of this - /// timer. - void mergeChildren(ChildrenMap &&otherChildren) { - // Check for an empty children list. - if (children.empty()) { - children = std::move(otherChildren); - return; - } + //===--------------------------------------------------------------------===// + // Pass + //===--------------------------------------------------------------------===// - // Pipeline merges are handled separately as the children are merged - // lexicographically. - if (kind == TimerKind::Pipeline) { - assert(children.size() == otherChildren.size() && - "pipeline merge requires the same number of children"); - for (auto it : llvm::zip(children, otherChildren)) - std::get<0>(it).second->merge(std::move(*std::get<1>(it).second)); - return; + void runBeforePass(Pass *pass, Operation *) override { + auto tid = llvm::get_threadid(); + auto &activeTimers = activeThreadTimers[tid]; + auto &parentScope = activeTimers.empty() ? rootScope : activeTimers.back(); + + if (auto *adaptor = dyn_cast(pass)) { + parentTimerIndices[{tid, pass}] = activeTimers.size(); + auto scope = parentScope.nest(pass->getPrototype(), [adaptor]() { + return adaptor->getAdaptorName(); + }); + scope.setTransparent(adaptor->getPassManagers().size() <= 1); + activeTimers.push_back(std::move(scope)); + } else { + activeTimers.push_back(parentScope.nest(pass->getPrototype(), [pass]() { + return std::string(pass->getName()); + })); } - - // Otherwise, we merge children based upon their timer key. - for (auto &otherChild : otherChildren) - mergeChild(std::move(otherChild)); } - /// Merge in the given child timer and id into this timer. - void mergeChild(ChildrenMap::value_type &&childIt) { - auto &child = children[childIt.first]; - if (!child) - child = std::move(childIt.second); - else - child->merge(std::move(*childIt.second)); + void runAfterPass(Pass *pass, Operation *) override { + auto tid = llvm::get_threadid(); + if (isa(pass)) { + parentTimerIndices.erase({tid, pass}); + } + auto &activeTimers = activeThreadTimers[tid]; + assert(!activeTimers.empty() && "expected active timer"); + activeTimers.pop_back(); } - /// Raw timing information. - std::chrono::time_point startTime; - std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0); - std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0); - - /// A map of unique identifiers to child timers. - ChildrenMap children; - - /// A descriptive name for this timer. - std::string name; - - /// The type of timer this instance represents. - TimerKind kind; -}; - -struct PassTiming : public PassInstrumentation { - PassTiming(std::unique_ptr config) - : config(std::move(config)) {} - ~PassTiming() override { print(); } - - /// Setup the instrumentation hooks. - void runBeforePipeline(Identifier name, - const PipelineParentInfo &parentInfo) override; - void runAfterPipeline(Identifier name, - const PipelineParentInfo &parentInfo) override; - void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); } - void runAfterPass(Pass *pass, Operation *) override; void runAfterPassFailed(Pass *pass, Operation *op) override { runAfterPass(pass, op); } - void runBeforeAnalysis(StringRef name, TypeID id, Operation *) override { - startAnalysisTimer(name, id); - } - void runAfterAnalysis(StringRef, TypeID, Operation *) override; - - /// Print and clear the timing results. - void print(); - /// Start a new timer for the given pass. - void startPassTimer(Pass *pass); + //===--------------------------------------------------------------------===// + // Analysis + //===--------------------------------------------------------------------===// - /// Start a new timer for the given analysis. - void startAnalysisTimer(StringRef name, TypeID id); - - /// Pop the last active timer for the current thread. - Timer *popLastActiveTimer() { + void runBeforeAnalysis(StringRef name, TypeID id, Operation *) override { auto tid = llvm::get_threadid(); auto &activeTimers = activeThreadTimers[tid]; - assert(!activeTimers.empty() && "expected active timer"); - return activeTimers.pop_back_val(); + auto &parentScope = activeTimers.empty() ? rootScope : activeTimers.back(); + activeTimers.push_back(parentScope.nest( + id.getAsOpaquePointer(), [name] { return "(A) " + name.str(); })); } - /// Print the timing result in list mode. - void printResultsAsList(raw_ostream &os, Timer *root, TimeRecord totalTime); - - /// Print the timing result in pipeline mode. - void printResultsAsPipeline(raw_ostream &os, Timer *root, - TimeRecord totalTime); - - /// Returns a timer for the provided identifier and name. - Timer *getTimer(const void *id, TimerKind kind, - std::function &&nameBuilder) { - auto tid = llvm::get_threadid(); - - // If there is no active timer then add to the root timer. - auto &activeTimers = activeThreadTimers[tid]; - Timer *parentTimer; - if (activeTimers.empty()) { - auto &rootTimer = rootTimers[tid]; - if (!rootTimer) - rootTimer = std::make_unique("root", TimerKind::Pipeline); - parentTimer = rootTimer.get(); - } else { - // Otherwise, add this to the active timer. - parentTimer = activeTimers.back(); - } - - auto timer = parentTimer->getChildTimer(id, kind, std::move(nameBuilder)); - activeTimers.push_back(timer); - return timer; + void runAfterAnalysis(StringRef, TypeID, Operation *) override { + auto &activeTimers = activeThreadTimers[llvm::get_threadid()]; + assert(!activeTimers.empty() && "expected active timer"); + activeTimers.pop_back(); } - - /// The root top level timers for each thread. - DenseMap> rootTimers; - - /// A stack of the currently active pass timers per thread. - DenseMap> activeThreadTimers; - - /// The configuration object to use when printing the timing results. - std::unique_ptr config; - - /// A mapping of pipeline timers that need to be merged into the parent - /// collection. The timers are mapped to the parent info to merge into. - DenseMap> - pipelinesToMerge; }; -} // end anonymous namespace - -void PassTiming::runBeforePipeline(Identifier name, - const PipelineParentInfo &parentInfo) { - // We don't actually want to time the pipelines, they gather their total - // from their held passes. - getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline, - [&] { return ("'" + name.strref() + "' Pipeline").str(); }); -} - -void PassTiming::runAfterPipeline(Identifier name, - const PipelineParentInfo &parentInfo) { - // Pop the timer for the pipeline. - auto tid = llvm::get_threadid(); - auto &activeTimers = activeThreadTimers[tid]; - assert(!activeTimers.empty() && "expected active timer"); - activeTimers.pop_back(); - - // If the current thread is the same as the parent, there is nothing left to - // do. - if (tid == parentInfo.parentThreadID) - return; - - // Otherwise, mark the pipeline timer for merging into the correct parent - // thread. - assert(activeTimers.empty() && "expected parent timer to be root"); - auto *parentTimer = rootTimers[tid].get(); - assert(parentTimer->children.size() == 1 && - parentTimer->children.count(name.getAsOpaquePointer()) && - "expected a single pipeline timer"); - pipelinesToMerge[parentInfo].push_back( - std::move(*parentTimer->children.begin())); - rootTimers.erase(tid); -} - -/// Start a new timer for the given pass. -void PassTiming::startPassTimer(Pass *pass) { - auto kind = isa(pass) ? TimerKind::PipelineCollection - : TimerKind::PassOrAnalysis; - Timer *timer = getTimer(pass, kind, [pass]() -> std::string { - if (auto *adaptor = dyn_cast(pass)) - return adaptor->getAdaptorName(); - return std::string(pass->getName()); - }); - - // We don't actually want to time the adaptor passes, they gather their total - // from their held passes. - if (!isa(pass)) - timer->start(); -} - -/// Start a new timer for the given analysis. -void PassTiming::startAnalysisTimer(StringRef name, TypeID id) { - Timer *timer = getTimer(id.getAsOpaquePointer(), TimerKind::PassOrAnalysis, - [name] { return "(A) " + name.str(); }); - timer->start(); -} - -/// Stop a pass timer. -void PassTiming::runAfterPass(Pass *pass, Operation *) { - Timer *timer = popLastActiveTimer(); - - // Check to see if we need to merge in the timing data for the pipelines - // running on other threads. - auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass}); - if (toMerge != pipelinesToMerge.end()) { - for (auto &it : toMerge->second) - timer->mergeChild(std::move(it)); - pipelinesToMerge.erase(toMerge); - } - - timer->stop(); -} - -/// Stop a timer. -void PassTiming::runAfterAnalysis(StringRef, TypeID, Operation *) { - popLastActiveTimer()->stop(); -} - -/// Utility to print the timer heading information. -static void printTimerHeader(raw_ostream &os, TimeRecord total) { - os << "===" << std::string(73, '-') << "===\n"; - // Figure out how many spaces to description name. - unsigned padding = (80 - kPassTimingDescription.size()) / 2; - os.indent(padding) << kPassTimingDescription << '\n'; - os << "===" << std::string(73, '-') << "===\n"; - - // Print the total time followed by the section headers. - os << llvm::format(" Total Execution Time: %5.4f seconds\n\n", total.wall); - if (total.user != total.wall) - os << " ---User Time---"; - os << " ---Wall Time--- --- Name ---\n"; -} - -/// Utility to print a single line entry in the timer output. -static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, - TimeRecord time, TimeRecord totalTime) { - time.print(os, totalTime); - os.indent(indent) << name << "\n"; -} - -/// Print out the current timing information. -void PassTiming::print() { - // Don't print anything if there is no timing data. - if (rootTimers.empty()) - return; - - assert(rootTimers.size() == 1 && "expected one remaining root timer"); - - auto printCallback = [&](raw_ostream &os) { - auto &rootTimer = rootTimers.begin()->second; - // Print the timer header. - TimeRecord totalTime = rootTimer->getTotalTime(); - printTimerHeader(os, totalTime); - // Defer to a specialized printer for each display mode. - switch (config->getDisplayMode()) { - case PassDisplayMode::List: - printResultsAsList(os, rootTimer.get(), totalTime); - break; - case PassDisplayMode::Pipeline: - printResultsAsPipeline(os, rootTimer.get(), totalTime); - break; - } - printTimeEntry(os, 0, "Total", totalTime, totalTime); - os.flush(); - - // Reset root timers. - rootTimers.clear(); - activeThreadTimers.clear(); - }; - - config->printTiming(printCallback); -} - -// The default implementation for printTiming uses -// `llvm::CreateInfoOutputFile()` as stream, it can be overridden by clients -// to customize the output. -void PassManager::PassTimingConfig::printTiming(PrintCallbackFn printCallback) { - printCallback(*llvm::CreateInfoOutputFile()); -} - -/// Print the timing result in list mode. -void PassTiming::printResultsAsList(raw_ostream &os, Timer *root, - TimeRecord totalTime) { - llvm::StringMap mergedTimings; - - std::function addTimer = [&](Timer *timer) { - // Only add timing information for passes and analyses. - if (timer->kind == TimerKind::PassOrAnalysis) - mergedTimings[timer->name] += timer->getTotalTime(); - for (auto &children : timer->children) - addTimer(children.second.get()); - }; - - // Add each of the top level timers. - for (auto &topLevelTimer : root->children) - addTimer(topLevelTimer.second.get()); - - // Sort the timing information by wall time. - std::vector> timerNameAndTime; - for (auto &it : mergedTimings) - timerNameAndTime.emplace_back(it.first(), it.second); - llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(), - [](const std::pair *lhs, - const std::pair *rhs) { - return llvm::array_pod_sort_comparator( - &rhs->second.wall, &lhs->second.wall); - }); - - // Print the timing information sequentially. - for (auto &timeData : timerNameAndTime) - printTimeEntry(os, 0, timeData.first, timeData.second, totalTime); -} - -/// Print the timing result in pipeline mode. -void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root, - TimeRecord totalTime) { - std::function printTimer = [&](unsigned indent, - Timer *timer) { - // If this is a timer for a pipeline collection and the collection only has - // one pipeline child, then only print the child. - if (timer->kind == TimerKind::PipelineCollection && - timer->children.size() == 1) - return printTimer(indent, timer->children.begin()->second.get()); - - printTimeEntry(os, indent, timer->name, timer->getTotalTime(), totalTime); - - // If this timer is a pipeline, then print the children in-order. - if (timer->kind == TimerKind::Pipeline) { - for (auto &child : timer->children) - printTimer(indent + 2, child.second.get()); - return; - } - - // Otherwise, sort the children by name to give a deterministic ordering - // when emitting the time. - SmallVector children; - children.reserve(timer->children.size()); - for (auto &child : timer->children) - children.push_back(child.second.get()); - llvm::array_pod_sort(children.begin(), children.end(), - [](Timer *const *lhs, Timer *const *rhs) { - return (*lhs)->name.compare((*rhs)->name); - }); - for (auto &child : children) - printTimer(indent + 2, child); - }; - - // Print each of the top level timers. - for (auto &topLevelTimer : root->children) - printTimer(0, topLevelTimer.second.get()); -} - -// Out-of-line as key function. -PassManager::PassTimingConfig::~PassTimingConfig() {} +} // namespace //===----------------------------------------------------------------------===// // PassManager @@ -463,11 +131,19 @@ /// Add an instrumentation to time the execution of passes and the computation /// of analyses. void PassManager::enableTiming(std::unique_ptr config) { - // Check if pass timing is already enabled. - if (passTiming) + if (passTiming || ownedTimingManager) + return; + ownedTimingManager = std::make_unique(); + ownedTimingManager->enableTiming(std::move(config)); + ownedTimingScope = ownedTimingManager->getRootScope(); + enableTiming(ownedTimingScope); +} + +/// Add an instrumentation to time the execution of passes and the computation +/// of analyses. +void PassManager::enableTiming(TimingScope &timingScope) { + if (passTiming || !timingScope) return; - if (!config) - config = std::make_unique(); - addInstrumentation(std::make_unique(std::move(config))); + addInstrumentation(std::make_unique(timingScope)); passTiming = true; } Index: mlir/lib/Support/CMakeLists.txt =================================================================== --- mlir/lib/Support/CMakeLists.txt +++ mlir/lib/Support/CMakeLists.txt @@ -4,6 +4,7 @@ IndentedOstream.cpp MlirOptMain.cpp StorageUniquer.cpp + Timing.cpp ToolUtilities.cpp ) @@ -11,6 +12,7 @@ DebugCounter.cpp FileUtilities.cpp StorageUniquer.cpp + Timing.cpp ToolUtilities.cpp ADDITIONAL_HEADER_DIRS Index: mlir/lib/Support/Timing.cpp =================================================================== --- /dev/null +++ mlir/lib/Support/Timing.cpp @@ -0,0 +1,451 @@ +//===- Timing.cpp - Execution time measurement facilities -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Facilities to measure and provide statistics on execution time. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Support/Timing.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +using namespace mlir; +using llvm::StringRef; + +constexpr llvm::StringLiteral kTimingDescription = + "... Execution time report ..."; + +//===----------------------------------------------------------------------===// +// TimingConfig +//===----------------------------------------------------------------------===// + +// Out-of-line as key function. +TimingConfig::~TimingConfig() {} + +// The default implementation of the printing hook. +void TimingConfig::printTiming(PrintCallbackFn printCallback) { + printCallback(*llvm::CreateInfoOutputFile()); +} + +//===----------------------------------------------------------------------===// +// Timer +//===----------------------------------------------------------------------===// + +namespace { + +/// Simple record class to record timing information. +struct TimeRecord { + TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} + + TimeRecord &operator+=(const TimeRecord &other) { + wall += other.wall; + user += other.user; + return *this; + } + + TimeRecord &operator-=(const TimeRecord &other) { + wall -= other.wall; + user -= other.user; + return *this; + } + + /// Print the current time record to 'os', with a breakdown showing + /// contributions to the give 'total' time record. + void print(llvm::raw_ostream &os, const TimeRecord &total) { + if (total.user != total.wall) + os << llvm::format(" %8.4f (%5.1f%%)", user, 100.0 * user / total.user); + os << llvm::format(" %8.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall); + } + + double wall, user; +}; + +} // namespace + +/// Utility to print a single line entry in the timer output. +static void printTimeEntry(llvm::raw_ostream &os, unsigned indent, + StringRef name, TimeRecord time, TimeRecord total) { + time.print(os, total); + os.indent(indent) << name << "\n"; +} + +/// Utility to print the timer heading information. +static void printTimeHeader(llvm::raw_ostream &os, TimeRecord total) { + // Figure out how many spaces to description name. + unsigned padding = (80 - kTimingDescription.size()) / 2; + os << "===" << std::string(73, '-') << "===\n"; + os.indent(padding) << kTimingDescription << '\n'; + os << "===" << std::string(73, '-') << "===\n"; + + // Print the total time followed by the section headers. + os << llvm::format(" Total Execution Time: %.4f seconds\n\n", total.wall); + if (total.user != total.wall) + os << " ----User Time----"; + os << " ----Wall Time---- ----Name----\n"; +} + +/// A timer used to sample execution time. +/// +/// Separately tracks wall time and user time to account for parallel threads of +/// execution. Timers are intended to be started and stopped multiple times. +/// Each start and stop will add to the timer's wall and user time. +struct TimingManager::Timer : public TimerBase { + using ChildrenMap = llvm::MapVector>; + using AsyncChildrenMap = llvm::DenseMap; + + Timer(std::string &&name) : threadId(llvm::get_threadid()), name(name) {} + + /// Start the timer. + virtual void start() override { + startTime = std::chrono::system_clock::now(); + } + + /// Stop the timer. + virtual void stop() override { + auto newTime = std::chrono::system_clock::now() - startTime; + wallTime += newTime; + userTime += newTime; + } + + /// Hide the timer in timing reports and directly show its children. + virtual void setTransparent(bool x) override { transparent = x; } + + /// Create a child timer nested within this one. Multiple calls to this + /// function with the same unique identifier `id` will return the same child + /// timer. + /// + /// This function can be called from other threads, as long as this timer + /// outlives any uses of the child timer on the other thread. + virtual TimerBase *nest(const void *id, + std::function &&nameBuilder) override { + auto tid = llvm::get_threadid(); + if (tid == threadId) { + return nestTail(children[id], std::move(nameBuilder)); + } else { + std::unique_lock lock(asyncMutex); + return nestTail(asyncChildren[tid][id], std::move(nameBuilder)); + } + } + + /// Tail-called from `nest()`. + TimerBase *nestTail(std::unique_ptr &child, + std::function &&nameBuilder) { + if (!child) + child = std::make_unique(nameBuilder()); + return child.get(); + } + + /// Finalize this timer and all its children. + /// + /// If this timer has async children, which happens if `nest()` was called + /// from another thread, this function merges the async childr timers into the + /// main list of child timers. + /// + /// Caution: Call this function only after all nested timers running on other + /// threads no longer need their timers! + void finalize() { + addAsyncUserTime(); + mergeAsyncChildren(); + } + + /// Add the user time of all async children to this timer's user time. This is + /// necessary since the user time already contains all regular child timers, + /// but not the asynchronous ones (by the nesting nature of the timers). + std::chrono::nanoseconds addAsyncUserTime() { + auto added = std::chrono::nanoseconds(0); + for (auto &child : children) { + added += child.second->addAsyncUserTime(); + } + for (auto &thread : asyncChildren) { + for (auto &child : thread.second) { + child.second->addAsyncUserTime(); + added += child.second->userTime; + } + } + userTime += added; + return added; + } + + /// Ensure that this timer and recursively all its children have their async + /// children folded into the main map of children. + void mergeAsyncChildren() { + for (auto &child : children) + child.second->mergeAsyncChildren(); + mergeChildren(std::move(asyncChildren)); + assert(asyncChildren.empty()); + } + + /// Merge multiple child timers into this timer. + /// + /// Children in `other` are added as children to this timer, or, if this timer + /// already contains a child with the corresponding unique identifier, are + /// merged into the existing child. + void mergeChildren(ChildrenMap &&other) { + if (children.empty()) { + children = std::move(other); + for (auto &child : other) + child.second->mergeAsyncChildren(); + } else { + for (auto &child : other) + mergeChild(child.first, std::move(child.second)); + other.clear(); + } + } + + /// See above. + void mergeChildren(AsyncChildrenMap &&other) { + for (auto &thread : other) { + mergeChildren(std::move(thread.second)); + assert(thread.second.empty()); + } + other.clear(); + } + + /// Merge a child timer into this timer for a given unique identifier. + /// + /// Moves all child and async child timers of `other` into this timer's child + /// for the given unique identifier. + void mergeChild(const void *id, std::unique_ptr &&other) { + auto &into = children[id]; + if (!into) { + into = std::move(other); + into->mergeAsyncChildren(); + } else { + into->wallTime = std::max(into->wallTime, other->wallTime); + into->userTime += other->userTime; + into->mergeChildren(std::move(other->children)); + into->mergeChildren(std::move(other->asyncChildren)); + other.reset(); + } + } + + /// Dump a human-readable tree representation of the timer and its children. + /// This is useful for debugging the timing mechanisms and structure of the + /// timers. + void dump(llvm::raw_ostream &os, unsigned indent = 0, + unsigned markThreadId = 0) { + auto time = getTimeRecord(); + os << std::string(indent * 2, ' ') << name << " [" << threadId << "]" + << llvm::format(" %7.4f / %7.4f", time.user, time.wall); + if (threadId != markThreadId && markThreadId != 0) + os << " (*)"; + os << "\n"; + for (auto &child : children) + child.second->dump(os, indent + 1, threadId); + for (auto &thread : asyncChildren) + for (auto &child : thread.second) + child.second->dump(os, indent + 1, threadId); + } + + /// Returns the time for this timer in seconds. + TimeRecord getTimeRecord() { + return TimeRecord( + std::chrono::duration_cast>(wallTime) + .count(), + std::chrono::duration_cast>(userTime) + .count()); + } + + /// Print the timing result in list mode. + void printAsList(llvm::raw_ostream &os, TimeRecord total) { + // Flatten the leaf timers in the tree and merge them by name. + llvm::StringMap mergedTimers; + std::function addTimer = [&](Timer *timer) { + if (timer->children.empty()) + mergedTimers[timer->name] += timer->getTimeRecord(); + else + for (auto &children : timer->children) + addTimer(children.second.get()); + }; + addTimer(this); + + // Sort the timing information by wall time. + std::vector> timerNameAndTime; + for (auto &it : mergedTimers) + timerNameAndTime.emplace_back(it.first(), it.second); + llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(), + [](const std::pair *lhs, + const std::pair *rhs) { + return llvm::array_pod_sort_comparator( + &rhs->second.wall, &lhs->second.wall); + }); + + // Print the timing information sequentially. + for (auto &timeData : timerNameAndTime) + printTimeEntry(os, 0, timeData.first, timeData.second, total); + } + + /// Print the timing result in pipeline mode. + void printAsPipeline(llvm::raw_ostream &os, TimeRecord total, + unsigned indent = 0) { + unsigned childIndent = indent; + if (!transparent) { + printTimeEntry(os, indent, name, getTimeRecord(), total); + childIndent += 2; + } + for (auto &child : children) { + child.second->printAsPipeline(os, total, childIndent); + } + } + + /// Print out the current timing information. + void print(llvm::raw_ostream &os, TimingDisplayMode displayMode) { + // Print the banner. + auto total = getTimeRecord(); + printTimeHeader(os, total); + + // Defer to a specialized printer for each display mode. + switch (displayMode) { + case TimingDisplayMode::List: + printAsList(os, total); + break; + case TimingDisplayMode::Pipeline: + printAsPipeline(os, total); + break; + default: + llvm_unreachable("unhandled display mode"); + } + + // Print the top-level time not accounted for by child timers, and the + // total. + auto rest = total; + for (auto &child : children) + rest -= child.second->getTimeRecord(); + printTimeEntry(os, 0, "Rest", rest, total); + printTimeEntry(os, 0, "Total", total, total); + os.flush(); + } + + /// The last time instant at which the timer was started. + std::chrono::time_point startTime; + + /// Accumulated wall time. If multiple threads of execution are merged into + /// this timer, the wall time will hold the maximum wall time of each thread + /// of execution. + std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0); + + /// Accumulated user time. If multiple threads of execution are merged into + /// this timer, each thread's user time is added here. + std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0); + + /// The thread on which this timer is running. + uint64_t threadId; + + /// A descriptive name for this timer. + std::string name; + + /// Whether omit this timer from reports and directly show its children. + bool transparent = false; + + /// Child timers on the same thread the timer itself. We keep at most one + /// timer per unique identifier. + ChildrenMap children; + + /// Child timers on other threads. We keep at most one timer per unique + /// identifier. + AsyncChildrenMap asyncChildren; + + /// Mutex for the async children. + std::mutex asyncMutex; +}; + +//===----------------------------------------------------------------------===// +// TimingManager +//===----------------------------------------------------------------------===// + +TimingManager::TimingManager() { + clear(); // initializes the root timer +} + +TimingManager::~TimingManager() { print(); } + +TimerBase *TimingManager::getRootTimer() { + return config ? rootTimer.get() : nullptr; +} + +/// Enable execution time sampling. +void TimingManager::enableTiming(std::unique_ptr newConfig) { + if (!newConfig) + newConfig = std::make_unique(); + config = std::move(newConfig); +} + +/// Print and clear the timing results. +void TimingManager::print() { + if (config) { + rootTimer->finalize(); + config->printTiming([&](llvm::raw_ostream &os) { + rootTimer->print(os, config->getDisplayMode()); + }); + } + clear(); +} + +/// Clear the timing results. +void TimingManager::clear() { + rootTimer = std::make_unique("root"); + rootTimer->transparent = true; +} + +/// Debug print the timer data structures to an output stream. +void TimingManager::dumpTimers(llvm::raw_ostream &os) { rootTimer->dump(os); } + +/// Debug print the timers as a list. +void TimingManager::dumpAsList(llvm::raw_ostream &os) { + rootTimer->finalize(); + rootTimer->print(os, TimingDisplayMode::List); +} + +/// Debug print the timers as a pipeline. +void TimingManager::dumpAsPipeline(llvm::raw_ostream &os) { + rootTimer->finalize(); + rootTimer->print(os, TimingDisplayMode::Pipeline); +} + +//===----------------------------------------------------------------------===// +// TimingManagerOptions +//===----------------------------------------------------------------------===// + +namespace { +struct TimingManagerOptions { + llvm::cl::opt timing{"timing", + llvm::cl::desc("Display execution times")}; + llvm::cl::opt timingDisplayMode{ + "timing-display", llvm::cl::desc("Display method for timing data"), + llvm::cl::init(TimingDisplayMode::Pipeline), + llvm::cl::values( + clEnumValN(TimingDisplayMode::List, "list", + "display the results in a list sorted by total time"), + clEnumValN(TimingDisplayMode::Pipeline, "pipeline", + "display the results with a nested pipeline view"))}; +}; +} // end anonymous namespace + +static llvm::ManagedStatic options; + +void mlir::registerTimingManagerCLOptions() { + // Make sure that the options struct has been constructed. + *options; +} + +void mlir::applyTimingManagerCLOptions(TimingManagerBase &tm) { + if (!options.isConstructed()) + return; + if (options->timing) + tm.enableTiming(std::make_unique(options->timingDisplayMode)); +}