diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -975,7 +975,7 @@ passes are taking the most time to execute, as well as how much of an effect a pass has on the total execution time of the pipeline. Users can enable this instrumentation directly on the PassManager via `enableTiming`. This -instrumentation is also made available in mlir-opt via the `-pass-timing` flag. +instrumentation is also made available in mlir-opt via the `-mlir-timing` flag. The PassTiming instrumentation provides several different display modes for the timing results, each of which is described below: @@ -985,10 +985,10 @@ pass/analysis instance aggregated into one unique result. This view is useful for getting an overview of what analyses/passes are taking the most time in a pipeline. This display mode is available in mlir-opt via -`-pass-timing-display=list`. +`-mlir-timing-display=list`. ```shell -$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing -pass-timing-display=list +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -mlir-timing -mlir-timing-display=list ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -1004,7 +1004,7 @@ 0.0084 (100.0%) Total ``` -##### Pipeline Display Mode +##### Tree Display Mode In this mode, the results are displayed in a nested pipeline view that mirrors the internal pass pipeline that is being executed in the pass manager. This view @@ -1013,7 +1013,7 @@ invalidated and recomputed. This is the default display mode. ```shell -$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -mlir-timing ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -1044,7 +1044,7 @@ cpu time. ```shell -$ mlir-opt foo.mlir -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing +$ mlir-opt foo.mlir -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -mlir-timing ===-------------------------------------------------------------------------=== ... Pass execution timing report ... diff --git a/mlir/include/mlir/Pass/Pass.h b/mlir/include/mlir/Pass/Pass.h --- a/mlir/include/mlir/Pass/Pass.h +++ b/mlir/include/mlir/Pass/Pass.h @@ -145,6 +145,21 @@ ArrayRef getStatistics() const { return statistics; } MutableArrayRef getStatistics() { return statistics; } + /// Returns the thread sibling of this pass. + /// + /// If this pass was cloned by the pass manager for the sake of + /// multi-threading, this function returns the original pass it was cloned + /// from. This is useful for diagnostic purposes to distinguish passes that + /// were replicated for threading purposes from passes instantiated by the + /// user. Used to collapse passes in timing statistics. + const Pass *getThreadingSibling() const { return threadingSibling; } + + /// Returns the thread sibling of this pass, or the pass itself it has no + /// sibling. See `getThreadingSibling()` for details. + const Pass *getThreadingSiblingOrThis() const { + return threadingSibling ? threadingSibling : this; + } + protected: explicit Pass(TypeID passID, Optional opName = llvm::None) : passID(passID), opName(opName) {} @@ -292,6 +307,10 @@ /// The pass options registered to this pass instance. detail::PassOptions passOptions; + /// A pointer to the pass this pass was cloned from, if the clone was made by + /// the pass manager for the sake of multi-threading. + const Pass *threadingSibling = nullptr; + /// Allow access to 'clone'. friend class OpPassManager; diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -12,6 +12,7 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OperationSupport.h" #include "mlir/Support/LogicalResult.h" +#include "mlir/Support/Timing.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" @@ -313,38 +314,36 @@ //===--------------------------------------------------------------------===// // Pass Timing - /// A configuration struct provided to the pass timing feature. - class PassTimingConfig { - public: - using PrintCallbackFn = function_ref; - - /// Initialize the configuration. - /// * 'displayMode' switch between list or pipeline display (see the - /// `PassDisplayMode` enum documentation). - explicit PassTimingConfig( - PassDisplayMode displayMode = PassDisplayMode::Pipeline) - : displayMode(displayMode) {} - - virtual ~PassTimingConfig(); - - /// A hook that may be overridden by a derived config to control the - /// printing. The callback is supplied by the framework and the config is - /// responsible to call it back with a stream for the output. - virtual void printTiming(PrintCallbackFn printCallback); - - /// Return the `PassDisplayMode` this config was created with. - PassDisplayMode getDisplayMode() { return displayMode; } + /// Add an instrumentation to time the execution of passes and the computation + /// of analyses. Timing will be reported by nesting timers into the provided + /// `timingScope`. + /// + /// Note: Timing should be enabled after all other instrumentations to avoid + /// any potential "ghost" timing from other instrumentations being + /// unintentionally included in the timing results. + void enableTiming(TimingScope &timingScope); - private: - PassDisplayMode displayMode; - }; + /// Add an instrumentation to time the execution of passes and the computation + /// of analyses. The pass manager will take ownership of the timing manager + /// passed to the function and timing will be reported by nesting timers into + /// the timing manager's root scope. + /// + /// Note: Timing should be enabled after all other instrumentations to avoid + /// any potential "ghost" timing from other instrumentations being + /// unintentionally included in the timing results. + void enableTiming(std::unique_ptr tm); /// Add an instrumentation to time the execution of passes and the computation - /// of analyses. + /// of analyses. Creates a temporary TimingManager owned by this PassManager + /// which will be used to report timing. + /// /// Note: Timing should be enabled after all other instrumentations to avoid /// any potential "ghost" timing from other instrumentations being /// unintentionally included in the timing results. - void enableTiming(std::unique_ptr config = nullptr); + void enableTiming(); + + //===--------------------------------------------------------------------===// + // Pass Statistics /// Prompts the pass manager to print the statistics collected for each of the /// held passes after each call to 'run'. @@ -395,6 +394,13 @@ /// Apply any values provided to the pass manager options that were registered /// with 'registerPassManagerOptions'. void applyPassManagerCLOptions(PassManager &pm); + +/// Apply any values provided to the timing manager options that were registered +/// with `registerDefaultTimingManagerOptions`. This is a handy helper function +/// if you do not want to bother creating your own timing manager and passing it +/// to the pass manager. +void applyDefaultTimingPassManagerCLOptions(PassManager &pm); + } // end namespace mlir #endif // MLIR_PASS_PASSMANAGER_H diff --git a/mlir/include/mlir/Support/Timing.h b/mlir/include/mlir/Support/Timing.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Support/Timing.h @@ -0,0 +1,424 @@ +//===- Timing.h - Execution time measurement facilities ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Facilities to measure and provide statistics on execution time. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_SUPPORT_TIMING_H +#define MLIR_SUPPORT_TIMING_H + +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMapEntry.h" +#include "llvm/Support/raw_ostream.h" + +namespace mlir { + +class Timer; +class TimingManager; +class TimingScope; +class DefaultTimingManager; +namespace detail { +class TimingManagerImpl; +class DefaultTimingManagerImpl; +} // namespace detail + +//===----------------------------------------------------------------------===// +// TimingIdentifier +//===----------------------------------------------------------------------===// + +/// This class represesents a uniqued string owned by a `TimingManager`. Most +/// importantly, instances of this class provide a stable opaque pointer that +/// is guaranteed to be reproduced by later interning of the same string. The +/// `TimingManager` uses this mechanism to provide timers with an opaque id +/// even when the user of the API merely provided a string as identification +/// (instead of a pass for example). +/// +/// This is a POD type with pointer size, so it should be passed around by +/// value. The underlying data is owned by the `TimingManager`. +class TimingIdentifier { + using EntryType = llvm::StringMapEntry; + +public: + TimingIdentifier(const TimingIdentifier &) = default; + TimingIdentifier &operator=(const TimingIdentifier &other) = default; + + /// Return an identifier for the specified string. + static TimingIdentifier get(StringRef str, TimingManager &tm); + + /// Return a `StringRef` for the string. + StringRef strref() const { return entry->first(); } + + /// Return an `std::string`. + std::string str() const { return strref().str(); } + + /// Return the opaque pointer that corresponds to this identifier. + const void *getAsOpaquePointer() const { + return static_cast(entry); + } + +private: + const EntryType *entry; + explicit TimingIdentifier(const EntryType *entry) : entry(entry) {} +}; + +//===----------------------------------------------------------------------===// +// TimingManager +//===----------------------------------------------------------------------===// + +/// This class represents facilities to measure execution time. +/// +/// Libraries and infrastructure code operate on opque `Timer` handles returned +/// by various functions of this manager. Timers are started and stopped to +/// demarcate regions in the code where execution time is of interest, and they +/// can be nested to provide more detailed timing resolution. Calls to the timer +/// start, stop, and nesting functions must be balanced. To facilitate this, +/// users are encouraged to leverage the `TimingScope` RAII-style wrapper around +/// `Timer`s. +/// +/// Users can provide their own implementation of `TimingManager`, or use the +/// default `DefaultTimingManager` implementation in MLIR. Implementations +/// override the various protected virtual functions to create, nest, start, and +/// stop timers. A common pattern is for subclasses to provide a custom timer +/// class and simply pass pointers to instances of this class around as the +/// opaque timer handle. The manager itself can then forward callbacks to the +/// this class. Alternatively, external timing libraries may return their own +/// opaque handles for timing scopes. +/// +/// For example: +/// ``` +/// void doWork(TimingManager &tm) { +/// auto root = tm.getRootScope(); +/// +/// { +/// auto scope = root.nest("First"); +/// doSomeWork(); +/// // <-- "First" timer stops here +/// } +/// +/// auto scope = root.nest("Second"); +/// doEvenMoreWork(); +/// scope.stop(); // <-- "Second" timer stops here +/// +/// // <-- Root timer stops here +/// } +/// ``` +class TimingManager { +public: + explicit TimingManager(); + virtual ~TimingManager(); + + /// Get the root timer of this timing manager. The returned timer must be + /// started and stopped manually. Execution time can be measured by nesting + /// timers within this root timer and starting/stopping them as appropriate. + /// Use this function only if you need access to the timer itself. Otherwise + /// consider the more convenient `getRootScope()` which offers an RAII-style + /// wrapper around the timer. + Timer getRootTimer(); + + /// Get the root timer of this timing manager wrapped in a `TimingScope` for + /// convenience. Automatically starts the timer and stops it as soon as the + /// `TimingScope` is destroyed, e.g. when it goes out of scope. + TimingScope getRootScope(); + +protected: + // Allow `Timer` access to the protected callbacks. + friend class Timer; + + //===--------------------------------------------------------------------===// + // Callbacks + // + // See the corresponding functions in `Timer` for additional details. + + /// Return the root timer. Implementations should return `llvm::None` if the + /// collection of timing samples is disabled. This will cause the timers + /// constructed from the manager to be tombstones which can be skipped + /// quickly. + virtual Optional rootTimer() = 0; + + /// Start the timer with the given handle. + virtual void startTimer(void *handle) = 0; + + /// Stop the timer with the given handle. + virtual void stopTimer(void *handle) = 0; + + /// Create a child timer nested within the one with the given handle. The `id` + /// parameter is used to uniquely identify the timer within its parent. + /// Multiple calls to this function with the same `handle` and `id` should + /// return the same timer, or at least cause the samples of the returned + /// timers to be combined for the final timing results. + virtual void *nestTimer(void *handle, const void *id, + function_ref nameBuilder) = 0; + + /// Hide the timer in timing reports and directly show its children. This is + /// merely a hint that implementations are free to ignore. + virtual void hideTimer(void *handle) {} + +protected: + const std::unique_ptr impl; + + // Allow `TimingIdentifier::get` access to the private impl details. + friend class TimingIdentifier; + +private: + // Disallow copying the manager. + TimingManager(const TimingManager &) = delete; + void operator=(const TimingManager &) = delete; +}; + +//===----------------------------------------------------------------------===// +// Timer +//===----------------------------------------------------------------------===// + +/// A handle for a timer in a `TimingManager`. +/// +/// This class encapsulates a pointer to a `TimingManager` and an opaque handle +/// to a timer running within that manager. Libraries and infrastructure code +/// operate on `Timer` rather than any concrete classes handed out by custom +/// manager implementations. +class Timer { +public: + Timer() {} + Timer(const Timer &other) : tm(other.tm), handle(other.handle) {} + Timer(Timer &&other) : Timer(other) { + other.tm = nullptr; + other.handle = nullptr; + } + + Timer &operator=(Timer &&other) { + tm = other.tm; + handle = other.handle; + other.tm = nullptr; + other.handle = nullptr; + return *this; + } + + /// Returns whether this is a valid timer handle. Invalid timer handles are + /// used when timing is disabled in the `TimingManager` to keep the impact on + /// performance low. + explicit operator bool() const { return tm != nullptr; } + + /// Start the timer. This must be accompanied by a corresponding call to + /// `stop()` at a later point. + void start() { + if (tm) + tm->startTimer(handle); + } + + /// Stop the timer. This must have been preceded by a corresponding call to + /// `start()` at an earlier point. + void stop() { + if (tm) + tm->stopTimer(handle); + } + + /// Create a child timer nested within this one. Multiple calls to this + /// function with the same unique identifier `id` will return the same child + /// timer. The timer must have been started when calling this function. + /// + /// This function can be called from other threads, as long as this timer + /// is not stopped before any uses of the child timer on the other thread are + /// stopped. + /// + /// The `nameBuilder` function is not guaranteed to be called. + Timer nest(const void *id, function_ref nameBuilder) { + return tm ? Timer(*tm, tm->nestTimer(handle, id, std::move(nameBuilder))) + : Timer(); + } + + /// See above. + Timer nest(TimingIdentifier name) { + return tm ? nest(name.getAsOpaquePointer(), [=]() { return name.str(); }) + : Timer(); + } + + /// See above. + Timer nest(StringRef name) { + return tm ? nest(TimingIdentifier::get(name, *tm)) : Timer(); + } + + /// Hide the timer in timing reports and directly show its children. + void hide() { + if (tm) + tm->hideTimer(handle); + } + +protected: + Timer(TimingManager &tm, void *handle) : tm(&tm), handle(handle) {} + + // Allow the `TimingManager` access to the above constructor. + friend class TimingManager; + +private: + /// The associated timing manager. + TimingManager *tm = nullptr; + /// An opaque handle that identifies the timer in the timing manager + /// implementation. + void *handle = nullptr; +}; + +//===----------------------------------------------------------------------===// +// TimingScope +//===----------------------------------------------------------------------===// + +/// An RAII-style wrapper around a timer that ensures the timer is properly +/// started and stopped. +class TimingScope { +public: + TimingScope() : timer() {} + TimingScope(const Timer &other) : timer(other) { + if (timer) + timer.start(); + } + TimingScope(Timer &&other) : timer(std::move(other)) { + if (timer) + timer.start(); + } + TimingScope(TimingScope &&other) : timer(std::move(other.timer)) {} + ~TimingScope() { stop(); } + + TimingScope &operator=(TimingScope &&other) { + stop(); + timer = std::move(other.timer); + return *this; + } + + /// Check if the timing scope actually contains a valid timer. + explicit operator bool() const { return bool(timer); } + + // Disable copying of the `TimingScope`. + TimingScope(const TimingScope &) = delete; + TimingScope &operator=(const TimingScope &) = delete; + + /// Manually stop the timer early. + void stop() { + timer.stop(); + timer = Timer(); + } + + /// Create a nested timing scope. + /// + /// This returns a new `TimingScope` with a timer nested within the current + /// scope. In this fashion, the time in this scope may be further subdivided + /// in a more fine-grained fashion. + template + TimingScope nest(Args... args) { + return TimingScope(std::move(timer.nest(std::forward(args)...))); + } + + /// Hide the timer in timing reports and directly show its children. + void hide() { timer.hide(); } + +private: + /// The wrapped timer. + Timer timer; +}; + +//===----------------------------------------------------------------------===// +// DefaultTimingManager +//===----------------------------------------------------------------------===// + +/// Facilities for time measurement and report printing to an output stream. +/// +/// This is MLIR's default implementation of a `TimingManager`. Prints an +/// execution time report upon destruction, or manually through `print()`. By +/// default the results are printed in `DisplayMode::Tree` mode to stderr. +/// Use `setEnabled(true)` to enable collection of timing samples; it is +/// disabled by default. +/// +/// You should only instantiate a `DefaultTimingManager` if you are writing a +/// tool and want to pass a timing manager to the remaining infrastructure. If +/// you are writing library or infrastructure code, you should rather accept +/// the `TimingManager` base class to allow for users of your code to substitute +/// their own timing implementations. Also, if you only intend to collect time +/// samples, consider accepting a `Timer` or `TimingScope` instead. +class DefaultTimingManager : public TimingManager { +public: + /// The different display modes for printing the timers. + enum class DisplayMode { + /// In this mode the results are displayed in a list sorted by total time, + /// with timers aggregated into one unique result per timer name. + List, + + /// In this mode the results are displayed in a tree view, with child timers + /// nested under their parents. + Tree, + }; + + DefaultTimingManager(); + DefaultTimingManager(DefaultTimingManager &&rhs); + virtual ~DefaultTimingManager(); + + // Disable copying of the `DefaultTimingManager`. + DefaultTimingManager(const DefaultTimingManager &rhs) = delete; + DefaultTimingManager &operator=(const DefaultTimingManager &rhs) = delete; + + /// Enable or disable execution time sampling. + void setEnabled(bool enabled); + + /// Return whether execution time sampling is enabled. + bool isEnabled() const; + + /// Change the display mode. + void setDisplayMode(DisplayMode displayMode); + + /// Return the current display mode; + DisplayMode getDisplayMode() const; + + /// Change the stream where the output will be printed to. + void setOutput(raw_ostream &os); + + /// Return the current output stream where the output will be printed to. + raw_ostream &getOutput() const; + + /// Print and clear the timing results. Only call this when there are no more + /// references to nested timers around, as printing post-processes and clears + /// the timers. + void print(); + + /// Clear the timing results. Only call this when there are no more references + /// to nested timers around, as clearing invalidates them. + void clear(); + + /// Debug print the timer data structures to an output stream. + void dumpTimers(raw_ostream &os = llvm::errs()); + + /// Debug print the timers as a list. Only call this when there are no more + /// references to nested timers around. + void dumpAsList(raw_ostream &os = llvm::errs()); + + /// Debug print the timers as a tree. Only call this when there are no + /// more references to nested timers around. + void dumpAsTree(raw_ostream &os = llvm::errs()); + +protected: + // `TimingManager` callbacks + Optional rootTimer() override; + void startTimer(void *handle) override; + void stopTimer(void *handle) override; + void *nestTimer(void *handle, const void *id, + function_ref nameBuilder) override; + void hideTimer(void *handle) override; + +private: + const std::unique_ptr impl; +}; + +/// Register a set of useful command-line options that can be used to configure +/// a `DefaultTimingManager`. The values of these options can be applied via the +/// `applyDefaultTimingManagerCLOptions` method. +void registerDefaultTimingManagerCLOptions(); + +/// Apply any values that were registered with +/// 'registerDefaultTimingManagerOptions' to a `DefaultTimingManager`. +void applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm); + +} // namespace mlir + +#endif // MLIR_SUPPORT_TIMING_H diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -247,8 +247,11 @@ OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { impl.reset(new OpPassManagerImpl(rhs.impl->name, rhs.impl->nesting)); impl->initializationGeneration = rhs.impl->initializationGeneration; - for (auto &pass : rhs.impl->passes) - impl->passes.emplace_back(pass->clone()); + for (auto &pass : rhs.impl->passes) { + auto newPass = pass->clone(); + newPass->threadingSibling = pass.get(); + impl->passes.push_back(std::move(newPass)); + } return *this; } diff --git a/mlir/lib/Pass/PassManagerOptions.cpp b/mlir/lib/Pass/PassManagerOptions.cpp --- a/mlir/lib/Pass/PassManagerOptions.cpp +++ b/mlir/lib/Pass/PassManagerOptions.cpp @@ -9,6 +9,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" +#include "mlir/Support/Timing.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" @@ -56,22 +57,6 @@ /// Add an IR printing instrumentation if enabled by any 'print-ir' flags. void addPrinterInstrumentation(PassManager &pm); - //===--------------------------------------------------------------------===// - // Pass Timing - //===--------------------------------------------------------------------===// - llvm::cl::opt passTiming{ - "pass-timing", - llvm::cl::desc("Display the execution times of each pass")}; - llvm::cl::opt passTimingDisplayMode{ - "pass-timing-display", - llvm::cl::desc("Display method for pass timing data"), - llvm::cl::init(PassDisplayMode::Pipeline), - llvm::cl::values( - clEnumValN(PassDisplayMode::List, "list", - "display the results in a list sorted by total time"), - clEnumValN(PassDisplayMode::Pipeline, "pipeline", - "display the results with a nested pipeline view"))}; - //===--------------------------------------------------------------------===// // Pass Statistics //===--------------------------------------------------------------------===// @@ -87,9 +72,6 @@ "display the results in a merged list sorted by pass name"), clEnumValN(PassDisplayMode::Pipeline, "pipeline", "display the results with a nested pipeline view"))}; - - /// Add a pass timing instrumentation if enabled by 'pass-timing' flags. - void addTimingInstrumentation(PassManager &pm); }; } // end anonymous namespace @@ -135,13 +117,6 @@ printModuleScope, printAfterChange, llvm::errs()); } -/// Add a pass timing instrumentation if enabled by 'pass-timing' flags. -void PassManagerOptions::addTimingInstrumentation(PassManager &pm) { - if (passTiming) - pm.enableTiming( - std::make_unique(passTimingDisplayMode)); -} - void mlir::registerPassManagerCLOptions() { // Make sure that the options struct has been constructed. *options; @@ -162,9 +137,12 @@ // Add the IR printing instrumentation. options->addPrinterInstrumentation(pm); +} - // Note: The pass timing instrumentation should be added last to avoid any - // potential "ghost" timing from other instrumentations being unintentionally - // included in the timing results. - options->addTimingInstrumentation(pm); +void mlir::applyDefaultTimingPassManagerCLOptions(PassManager &pm) { + // Create a temporary timing manager for the PM to own, apply its CL options, + // and pass it to the PM. + auto tm = std::make_unique(); + applyDefaultTimingManagerCLOptions(*tm); + pm.enableTiming(std::move(tm)); } diff --git a/mlir/lib/Pass/PassTiming.cpp b/mlir/lib/Pass/PassTiming.cpp --- a/mlir/lib/Pass/PassTiming.cpp +++ b/mlir/lib/Pass/PassTiming.cpp @@ -8,466 +8,158 @@ #include "PassDetail.h" #include "mlir/Pass/PassManager.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Threading.h" + #include using namespace mlir; using namespace mlir::detail; -constexpr StringLiteral kPassTimingDescription = - "... Pass execution timing report ..."; +//===----------------------------------------------------------------------===// +// PassTiming +//===----------------------------------------------------------------------===// namespace { -/// Simple record class to record timing information. -struct TimeRecord { - TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} - - TimeRecord &operator+=(const TimeRecord &other) { - wall += other.wall; - user += other.user; - return *this; - } - - /// Print the current time record to 'os', with a breakdown showing - /// contributions to the give 'total' time record. - void print(raw_ostream &os, const TimeRecord &total) { - if (total.user != total.wall) - os << llvm::format(" %7.4f (%5.1f%%) ", user, - 100.0 * user / total.user); - os << llvm::format(" %7.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall); - } - - double wall, user; -}; - -/// An enumeration of the different types of timers. -enum class TimerKind { - /// This timer represents an ordered collection of pass timers, corresponding - /// to a pass pipeline. - Pipeline, - - /// This timer represents a collection of pipeline timers. - PipelineCollection, - - /// This timer represents an analysis or pass timer. - PassOrAnalysis -}; - -struct Timer { - explicit Timer(std::string &&name, TimerKind kind) - : name(std::move(name)), kind(kind) {} - - /// Start the timer. - void start() { startTime = std::chrono::system_clock::now(); } - - /// Stop the timer. - void stop() { - auto newTime = std::chrono::system_clock::now() - startTime; - wallTime += newTime; - userTime += newTime; - } +struct PassTiming : public PassInstrumentation { + PassTiming(TimingScope &timingScope) : rootScope(timingScope) {} + PassTiming(std::unique_ptr tm) + : ownedTimingManager(std::move(tm)), + ownedTimingScope(ownedTimingManager->getRootScope()), + rootScope(ownedTimingScope) {} + + /// If a pass can spawn additional work on other threads, it records the + /// index to its currently active timer here. Passes that run on a + /// newly-forked thread will check this list to find the active timer of the + /// parent thread into which the new thread should be nested. + DenseMap parentTimerIndices; + + /// A stack of the currently active timing scopes per thread. + DenseMap> activeThreadTimers; + + /// The timing manager owned by this instrumentation (in case timing was + /// enabled by the user on the pass manager without providing an external + /// timing manager). This *must* appear before the `ownedTimingScope` to + /// ensure the timing manager is destroyed *after* the scope, since the latter + /// may hold a timer that points into the former. + std::unique_ptr ownedTimingManager; + TimingScope ownedTimingScope; + + /// The root timing scope into which timing is reported. + TimingScope &rootScope; + + //===--------------------------------------------------------------------===// + // Pipeline + //===--------------------------------------------------------------------===// - /// Get or create a child timer with the provided name and id. - Timer *getChildTimer(const void *id, TimerKind kind, - std::function &&nameBuilder) { - auto &child = children[id]; - if (!child) - child = std::make_unique(nameBuilder(), kind); - return child.get(); - } + void runBeforePipeline(Identifier name, + const PipelineParentInfo &parentInfo) override { + auto tid = llvm::get_threadid(); + auto &activeTimers = activeThreadTimers[tid]; - /// Returns the total time for this timer in seconds. - TimeRecord getTotalTime() { - // If this is a pass or analysis timer, use the recorded time directly. - if (kind == TimerKind::PassOrAnalysis) { - return TimeRecord( - std::chrono::duration_cast>(wallTime) - .count(), - std::chrono::duration_cast>(userTime) - .count()); + TimingScope *parentScope; + if (activeTimers.empty()) { + auto it = parentTimerIndices.find(parentInfo); + if (it != parentTimerIndices.end()) + parentScope = + &activeThreadTimers[parentInfo.parentThreadID][it->second]; + else + parentScope = &rootScope; + } else { + parentScope = &activeTimers.back(); } - - // Otherwise, accumulate the timing from each of the children. - TimeRecord totalTime; - for (auto &child : children) - totalTime += child.second->getTotalTime(); - return totalTime; + activeTimers.push_back(parentScope->nest(name.getAsOpaquePointer(), [name] { + return ("'" + name.strref() + "' Pipeline").str(); + })); } - /// A map of unique identifiers to child timers. - using ChildrenMap = llvm::MapVector>; - - /// Merge the timing data from 'other' into this timer. - void merge(Timer &&other) { - if (wallTime < other.wallTime) - wallTime = other.wallTime; - userTime += other.userTime; - mergeChildren(std::move(other.children)); + void runAfterPipeline(Identifier, const PipelineParentInfo &) override { + auto &activeTimers = activeThreadTimers[llvm::get_threadid()]; + assert(!activeTimers.empty() && "expected active timer"); + activeTimers.pop_back(); } - /// Merge the timer children in 'otherChildren' with the children of this - /// timer. - void mergeChildren(ChildrenMap &&otherChildren) { - // Check for an empty children list. - if (children.empty()) { - children = std::move(otherChildren); - return; - } + //===--------------------------------------------------------------------===// + // Pass + //===--------------------------------------------------------------------===// - // Pipeline merges are handled separately as the children are merged - // lexicographically. - if (kind == TimerKind::Pipeline) { - assert(children.size() == otherChildren.size() && - "pipeline merge requires the same number of children"); - for (auto it : llvm::zip(children, otherChildren)) - std::get<0>(it).second->merge(std::move(*std::get<1>(it).second)); - return; + void runBeforePass(Pass *pass, Operation *) override { + auto tid = llvm::get_threadid(); + auto &activeTimers = activeThreadTimers[tid]; + auto &parentScope = activeTimers.empty() ? rootScope : activeTimers.back(); + + if (auto *adaptor = dyn_cast(pass)) { + parentTimerIndices[{tid, pass}] = activeTimers.size(); + auto scope = + parentScope.nest(pass->getThreadingSiblingOrThis(), + [adaptor]() { return adaptor->getAdaptorName(); }); + if (adaptor->getPassManagers().size() <= 1) + scope.hide(); + activeTimers.push_back(std::move(scope)); + } else { + activeTimers.push_back( + parentScope.nest(pass->getThreadingSiblingOrThis(), + [pass]() { return std::string(pass->getName()); })); } - - // Otherwise, we merge children based upon their timer key. - for (auto &otherChild : otherChildren) - mergeChild(std::move(otherChild)); } - /// Merge in the given child timer and id into this timer. - void mergeChild(ChildrenMap::value_type &&childIt) { - auto &child = children[childIt.first]; - if (!child) - child = std::move(childIt.second); - else - child->merge(std::move(*childIt.second)); + void runAfterPass(Pass *pass, Operation *) override { + auto tid = llvm::get_threadid(); + if (isa(pass)) + parentTimerIndices.erase({tid, pass}); + auto &activeTimers = activeThreadTimers[tid]; + assert(!activeTimers.empty() && "expected active timer"); + activeTimers.pop_back(); } - /// Raw timing information. - std::chrono::time_point startTime; - std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0); - std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0); - - /// A map of unique identifiers to child timers. - ChildrenMap children; - - /// A descriptive name for this timer. - std::string name; - - /// The type of timer this instance represents. - TimerKind kind; -}; - -struct PassTiming : public PassInstrumentation { - PassTiming(std::unique_ptr config) - : config(std::move(config)) {} - ~PassTiming() override { print(); } - - /// Setup the instrumentation hooks. - void runBeforePipeline(Identifier name, - const PipelineParentInfo &parentInfo) override; - void runAfterPipeline(Identifier name, - const PipelineParentInfo &parentInfo) override; - void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); } - void runAfterPass(Pass *pass, Operation *) override; void runAfterPassFailed(Pass *pass, Operation *op) override { runAfterPass(pass, op); } - void runBeforeAnalysis(StringRef name, TypeID id, Operation *) override { - startAnalysisTimer(name, id); - } - void runAfterAnalysis(StringRef, TypeID, Operation *) override; - /// Print and clear the timing results. - void print(); + //===--------------------------------------------------------------------===// + // Analysis + //===--------------------------------------------------------------------===// - /// Start a new timer for the given pass. - void startPassTimer(Pass *pass); - - /// Start a new timer for the given analysis. - void startAnalysisTimer(StringRef name, TypeID id); - - /// Pop the last active timer for the current thread. - Timer *popLastActiveTimer() { + void runBeforeAnalysis(StringRef name, TypeID id, Operation *) override { auto tid = llvm::get_threadid(); auto &activeTimers = activeThreadTimers[tid]; - assert(!activeTimers.empty() && "expected active timer"); - return activeTimers.pop_back_val(); + auto &parentScope = activeTimers.empty() ? rootScope : activeTimers.back(); + activeTimers.push_back(parentScope.nest( + id.getAsOpaquePointer(), [name] { return "(A) " + name.str(); })); } - /// Print the timing result in list mode. - void printResultsAsList(raw_ostream &os, Timer *root, TimeRecord totalTime); - - /// Print the timing result in pipeline mode. - void printResultsAsPipeline(raw_ostream &os, Timer *root, - TimeRecord totalTime); - - /// Returns a timer for the provided identifier and name. - Timer *getTimer(const void *id, TimerKind kind, - std::function &&nameBuilder) { - auto tid = llvm::get_threadid(); - - // If there is no active timer then add to the root timer. - auto &activeTimers = activeThreadTimers[tid]; - Timer *parentTimer; - if (activeTimers.empty()) { - auto &rootTimer = rootTimers[tid]; - if (!rootTimer) - rootTimer = std::make_unique("root", TimerKind::Pipeline); - parentTimer = rootTimer.get(); - } else { - // Otherwise, add this to the active timer. - parentTimer = activeTimers.back(); - } - - auto timer = parentTimer->getChildTimer(id, kind, std::move(nameBuilder)); - activeTimers.push_back(timer); - return timer; + void runAfterAnalysis(StringRef, TypeID, Operation *) override { + auto &activeTimers = activeThreadTimers[llvm::get_threadid()]; + assert(!activeTimers.empty() && "expected active timer"); + activeTimers.pop_back(); } - - /// The root top level timers for each thread. - DenseMap> rootTimers; - - /// A stack of the currently active pass timers per thread. - DenseMap> activeThreadTimers; - - /// The configuration object to use when printing the timing results. - std::unique_ptr config; - - /// A mapping of pipeline timers that need to be merged into the parent - /// collection. The timers are mapped to the parent info to merge into. - DenseMap> - pipelinesToMerge; }; -} // end anonymous namespace +} // namespace -void PassTiming::runBeforePipeline(Identifier name, - const PipelineParentInfo &parentInfo) { - // We don't actually want to time the pipelines, they gather their total - // from their held passes. - getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline, - [&] { return ("'" + name.strref() + "' Pipeline").str(); }); -} - -void PassTiming::runAfterPipeline(Identifier name, - const PipelineParentInfo &parentInfo) { - // Pop the timer for the pipeline. - auto tid = llvm::get_threadid(); - auto &activeTimers = activeThreadTimers[tid]; - assert(!activeTimers.empty() && "expected active timer"); - activeTimers.pop_back(); - - // If the current thread is the same as the parent, there is nothing left to - // do. - if (tid == parentInfo.parentThreadID) - return; - - // Otherwise, mark the pipeline timer for merging into the correct parent - // thread. - assert(activeTimers.empty() && "expected parent timer to be root"); - auto *parentTimer = rootTimers[tid].get(); - assert(parentTimer->children.size() == 1 && - parentTimer->children.count(name.getAsOpaquePointer()) && - "expected a single pipeline timer"); - pipelinesToMerge[parentInfo].push_back( - std::move(*parentTimer->children.begin())); - rootTimers.erase(tid); -} - -/// Start a new timer for the given pass. -void PassTiming::startPassTimer(Pass *pass) { - auto kind = isa(pass) ? TimerKind::PipelineCollection - : TimerKind::PassOrAnalysis; - Timer *timer = getTimer(pass, kind, [pass]() -> std::string { - if (auto *adaptor = dyn_cast(pass)) - return adaptor->getAdaptorName(); - return std::string(pass->getName()); - }); - - // We don't actually want to time the adaptor passes, they gather their total - // from their held passes. - if (!isa(pass)) - timer->start(); -} - -/// Start a new timer for the given analysis. -void PassTiming::startAnalysisTimer(StringRef name, TypeID id) { - Timer *timer = getTimer(id.getAsOpaquePointer(), TimerKind::PassOrAnalysis, - [name] { return "(A) " + name.str(); }); - timer->start(); -} - -/// Stop a pass timer. -void PassTiming::runAfterPass(Pass *pass, Operation *) { - Timer *timer = popLastActiveTimer(); - - // Check to see if we need to merge in the timing data for the pipelines - // running on other threads. - auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass}); - if (toMerge != pipelinesToMerge.end()) { - for (auto &it : toMerge->second) - timer->mergeChild(std::move(it)); - pipelinesToMerge.erase(toMerge); - } - - timer->stop(); -} - -/// Stop a timer. -void PassTiming::runAfterAnalysis(StringRef, TypeID, Operation *) { - popLastActiveTimer()->stop(); -} - -/// Utility to print the timer heading information. -static void printTimerHeader(raw_ostream &os, TimeRecord total) { - os << "===" << std::string(73, '-') << "===\n"; - // Figure out how many spaces to description name. - unsigned padding = (80 - kPassTimingDescription.size()) / 2; - os.indent(padding) << kPassTimingDescription << '\n'; - os << "===" << std::string(73, '-') << "===\n"; - - // Print the total time followed by the section headers. - os << llvm::format(" Total Execution Time: %5.4f seconds\n\n", total.wall); - if (total.user != total.wall) - os << " ---User Time---"; - os << " ---Wall Time--- --- Name ---\n"; -} - -/// Utility to print a single line entry in the timer output. -static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, - TimeRecord time, TimeRecord totalTime) { - time.print(os, totalTime); - os.indent(indent) << name << "\n"; -} +//===----------------------------------------------------------------------===// +// PassManager +//===----------------------------------------------------------------------===// -/// Print out the current timing information. -void PassTiming::print() { - // Don't print anything if there is no timing data. - if (rootTimers.empty()) +/// Add an instrumentation to time the execution of passes and the computation +/// of analyses. +void PassManager::enableTiming(TimingScope &timingScope) { + if (!timingScope) return; - - assert(rootTimers.size() == 1 && "expected one remaining root timer"); - - auto printCallback = [&](raw_ostream &os) { - auto &rootTimer = rootTimers.begin()->second; - // Print the timer header. - TimeRecord totalTime = rootTimer->getTotalTime(); - printTimerHeader(os, totalTime); - // Defer to a specialized printer for each display mode. - switch (config->getDisplayMode()) { - case PassDisplayMode::List: - printResultsAsList(os, rootTimer.get(), totalTime); - break; - case PassDisplayMode::Pipeline: - printResultsAsPipeline(os, rootTimer.get(), totalTime); - break; - } - printTimeEntry(os, 0, "Total", totalTime, totalTime); - os.flush(); - - // Reset root timers. - rootTimers.clear(); - activeThreadTimers.clear(); - }; - - config->printTiming(printCallback); + addInstrumentation(std::make_unique(timingScope)); } -// The default implementation for printTiming uses -// `llvm::CreateInfoOutputFile()` as stream, it can be overridden by clients -// to customize the output. -void PassManager::PassTimingConfig::printTiming(PrintCallbackFn printCallback) { - printCallback(*llvm::CreateInfoOutputFile()); -} - -/// Print the timing result in list mode. -void PassTiming::printResultsAsList(raw_ostream &os, Timer *root, - TimeRecord totalTime) { - llvm::StringMap mergedTimings; - - std::function addTimer = [&](Timer *timer) { - // Only add timing information for passes and analyses. - if (timer->kind == TimerKind::PassOrAnalysis) - mergedTimings[timer->name] += timer->getTotalTime(); - for (auto &children : timer->children) - addTimer(children.second.get()); - }; - - // Add each of the top level timers. - for (auto &topLevelTimer : root->children) - addTimer(topLevelTimer.second.get()); - - // Sort the timing information by wall time. - std::vector> timerNameAndTime; - for (auto &it : mergedTimings) - timerNameAndTime.emplace_back(it.first(), it.second); - llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(), - [](const std::pair *lhs, - const std::pair *rhs) { - return llvm::array_pod_sort_comparator( - &rhs->second.wall, &lhs->second.wall); - }); - - // Print the timing information sequentially. - for (auto &timeData : timerNameAndTime) - printTimeEntry(os, 0, timeData.first, timeData.second, totalTime); -} - -/// Print the timing result in pipeline mode. -void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root, - TimeRecord totalTime) { - std::function printTimer = [&](unsigned indent, - Timer *timer) { - // If this is a timer for a pipeline collection and the collection only has - // one pipeline child, then only print the child. - if (timer->kind == TimerKind::PipelineCollection && - timer->children.size() == 1) - return printTimer(indent, timer->children.begin()->second.get()); - - printTimeEntry(os, indent, timer->name, timer->getTotalTime(), totalTime); - - // If this timer is a pipeline, then print the children in-order. - if (timer->kind == TimerKind::Pipeline) { - for (auto &child : timer->children) - printTimer(indent + 2, child.second.get()); - return; - } - - // Otherwise, sort the children by name to give a deterministic ordering - // when emitting the time. - SmallVector children; - children.reserve(timer->children.size()); - for (auto &child : timer->children) - children.push_back(child.second.get()); - llvm::array_pod_sort(children.begin(), children.end(), - [](Timer *const *lhs, Timer *const *rhs) { - return (*lhs)->name.compare((*rhs)->name); - }); - for (auto &child : children) - printTimer(indent + 2, child); - }; - - // Print each of the top level timers. - for (auto &topLevelTimer : root->children) - printTimer(0, topLevelTimer.second.get()); +/// Add an instrumentation to time the execution of passes and the computation +/// of analyses. +void PassManager::enableTiming(std::unique_ptr tm) { + if (!tm->getRootTimer()) + return; // no need to keep the timing manager around if it's disabled + addInstrumentation(std::make_unique(std::move(tm))); } -// Out-of-line as key function. -PassManager::PassTimingConfig::~PassTimingConfig() {} - -//===----------------------------------------------------------------------===// -// PassManager -//===----------------------------------------------------------------------===// - /// Add an instrumentation to time the execution of passes and the computation /// of analyses. -void PassManager::enableTiming(std::unique_ptr config) { - // Check if pass timing is already enabled. - if (passTiming) - return; - if (!config) - config = std::make_unique(); - addInstrumentation(std::make_unique(std::move(config))); - passTiming = true; +void PassManager::enableTiming() { + auto tm = std::make_unique(); + tm->setEnabled(true); + enableTiming(std::move(tm)); } diff --git a/mlir/lib/Support/CMakeLists.txt b/mlir/lib/Support/CMakeLists.txt --- a/mlir/lib/Support/CMakeLists.txt +++ b/mlir/lib/Support/CMakeLists.txt @@ -4,6 +4,7 @@ IndentedOstream.cpp MlirOptMain.cpp StorageUniquer.cpp + Timing.cpp ToolUtilities.cpp ) @@ -11,6 +12,7 @@ DebugCounter.cpp FileUtilities.cpp StorageUniquer.cpp + Timing.cpp ToolUtilities.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Support/MlirOptMain.cpp b/mlir/lib/Support/MlirOptMain.cpp --- a/mlir/lib/Support/MlirOptMain.cpp +++ b/mlir/lib/Support/MlirOptMain.cpp @@ -24,6 +24,7 @@ #include "mlir/Pass/PassManager.h" #include "mlir/Support/DebugCounter.h" #include "mlir/Support/FileUtilities.h" +#include "mlir/Support/Timing.h" #include "mlir/Support/ToolUtilities.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileUtilities.h" @@ -47,21 +48,28 @@ bool verifyPasses, SourceMgr &sourceMgr, MLIRContext *context, const PassPipelineCLParser &passPipeline) { + DefaultTimingManager tm; + applyDefaultTimingManagerCLOptions(tm); + TimingScope timing = tm.getRootScope(); + // Disable multi-threading when parsing the input file. This removes the // unnecessary/costly context synchronization when parsing. bool wasThreadingEnabled = context->isMultithreadingEnabled(); context->disableMultithreading(); // Parse the input file and reset the context threading state. + TimingScope parserTiming = timing.nest("Parser"); OwningModuleRef module(parseSourceFile(sourceMgr, context)); context->enableMultithreading(wasThreadingEnabled); if (!module) return failure(); + parserTiming.stop(); // Apply any pass manager command line options. PassManager pm(context, OpPassManager::Nesting::Implicit); pm.enableVerifier(verifyPasses); applyPassManagerCLOptions(pm); + pm.enableTiming(timing); auto errorHandler = [&](const Twine &msg) { emitError(UnknownLoc::get(context)) << msg; @@ -77,6 +85,7 @@ return failure(); // Print the output. + TimingScope outputTiming = timing.nest("Output"); module->print(os); os << '\n'; return success(); @@ -195,6 +204,7 @@ registerAsmPrinterCLOptions(); registerMLIRContextCLOptions(); registerPassManagerCLOptions(); + registerDefaultTimingManagerCLOptions(); DebugCounter::registerCLOptions(); PassPipelineCLParser passPipeline("", "Compiler passes to run"); diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Support/Timing.cpp @@ -0,0 +1,566 @@ +//===- Timing.cpp - Execution time measurement facilities -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Facilities to measure and provide statistics on execution time. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Support/Timing.h" +#include "mlir/Support/ThreadLocalCache.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/RWMutex.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +using namespace mlir; +using namespace detail; +using DisplayMode = DefaultTimingManager::DisplayMode; + +constexpr llvm::StringLiteral kTimingDescription = + "... Execution time report ..."; + +//===----------------------------------------------------------------------===// +// TimingManager +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace detail { +/// Private implementation details of the `TimingManager`. +class TimingManagerImpl { +public: + // Identifier allocator, map, and mutex for thread safety. + llvm::BumpPtrAllocator identifierAllocator; + llvm::StringSet identifiers; + llvm::sys::SmartRWMutex identifierMutex; + + /// A thread local cache of identifiers to reduce lock contention. + ThreadLocalCache *>> + localIdentifierCache; + + TimingManagerImpl() : identifiers(identifierAllocator) {} +}; +} // namespace detail +} // namespace mlir + +TimingManager::TimingManager() : impl(std::make_unique()) {} + +TimingManager::~TimingManager() {} + +/// Get the root timer of this timing manager. +Timer TimingManager::getRootTimer() { + auto rt = rootTimer(); + return rt.hasValue() ? Timer(*this, rt.getValue()) : Timer(); +} + +/// Get the root timer of this timing manager wrapped in a `TimingScope`. +TimingScope TimingManager::getRootScope() { + return TimingScope(getRootTimer()); +} + +//===----------------------------------------------------------------------===// +// Identifier uniquing +//===----------------------------------------------------------------------===// + +/// Return an identifier for the specified string. +TimingIdentifier TimingIdentifier::get(StringRef str, TimingManager &tm) { + // Check for an existing instance in the local cache. + auto &impl = *tm.impl; + auto *&localEntry = (*impl.localIdentifierCache)[str]; + if (localEntry) + return TimingIdentifier(localEntry); + + // Check for an existing identifier in read-only mode. + { + llvm::sys::SmartScopedReader contextLock(impl.identifierMutex); + auto it = impl.identifiers.find(str); + if (it != impl.identifiers.end()) { + localEntry = &*it; + return TimingIdentifier(localEntry); + } + } + + // Acquire a writer-lock so that we can safely create the new instance. + llvm::sys::SmartScopedWriter contextLock(impl.identifierMutex); + auto it = impl.identifiers.insert(str).first; + localEntry = &*it; + return TimingIdentifier(localEntry); +} + +//===----------------------------------------------------------------------===// +// Helpers for time record printing +//===----------------------------------------------------------------------===// + +namespace { + +/// Simple record class to record timing information. +struct TimeRecord { + TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} + + TimeRecord &operator+=(const TimeRecord &other) { + wall += other.wall; + user += other.user; + return *this; + } + + TimeRecord &operator-=(const TimeRecord &other) { + wall -= other.wall; + user -= other.user; + return *this; + } + + /// Print the current time record to 'os', with a breakdown showing + /// contributions to the give 'total' time record. + void print(raw_ostream &os, const TimeRecord &total) { + if (total.user != total.wall) + os << llvm::format(" %8.4f (%5.1f%%)", user, 100.0 * user / total.user); + os << llvm::format(" %8.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall); + } + + double wall, user; +}; + +} // namespace + +/// Utility to print a single line entry in the timer output. +static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, + TimeRecord time, TimeRecord total) { + time.print(os, total); + os.indent(indent) << name << "\n"; +} + +/// Utility to print the timer heading information. +static void printTimeHeader(raw_ostream &os, TimeRecord total) { + // Figure out how many spaces to description name. + unsigned padding = (80 - kTimingDescription.size()) / 2; + os << "===" << std::string(73, '-') << "===\n"; + os.indent(padding) << kTimingDescription << '\n'; + os << "===" << std::string(73, '-') << "===\n"; + + // Print the total time followed by the section headers. + os << llvm::format(" Total Execution Time: %.4f seconds\n\n", total.wall); + if (total.user != total.wall) + os << " ----User Time----"; + os << " ----Wall Time---- ----Name----\n"; +} + +//===----------------------------------------------------------------------===// +// Timer Implementation for DefaultTimingManager +//===----------------------------------------------------------------------===// + +namespace { + +/// A timer used to sample execution time. +/// +/// Separately tracks wall time and user time to account for parallel threads of +/// execution. Timers are intended to be started and stopped multiple times. +/// Each start and stop will add to the timer's wall and user time. +class TimerImpl { +public: + using ChildrenMap = llvm::MapVector>; + using AsyncChildrenMap = llvm::DenseMap; + + TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {} + + /// Start the timer. + void start() { startTime = std::chrono::system_clock::now(); } + + /// Stop the timer. + void stop() { + auto newTime = std::chrono::system_clock::now() - startTime; + wallTime += newTime; + userTime += newTime; + } + + /// Create a child timer nested within this one. Multiple calls to this + /// function with the same unique identifier `id` will return the same child + /// timer. + /// + /// This function can be called from other threads, as long as this timer + /// outlives any uses of the child timer on the other thread. + TimerImpl *nest(const void *id, function_ref nameBuilder) { + auto tid = llvm::get_threadid(); + if (tid == threadId) + return nestTail(children[id], std::move(nameBuilder)); + std::unique_lock lock(asyncMutex); + return nestTail(asyncChildren[tid][id], std::move(nameBuilder)); + } + + /// Tail-called from `nest()`. + TimerImpl *nestTail(std::unique_ptr &child, + function_ref nameBuilder) { + if (!child) + child = std::make_unique(nameBuilder()); + return child.get(); + } + + /// Finalize this timer and all its children. + /// + /// If this timer has async children, which happens if `nest()` was called + /// from another thread, this function merges the async childr timers into the + /// main list of child timers. + /// + /// Caution: Call this function only after all nested timers running on other + /// threads no longer need their timers! + void finalize() { + addAsyncUserTime(); + mergeAsyncChildren(); + } + + /// Add the user time of all async children to this timer's user time. This is + /// necessary since the user time already contains all regular child timers, + /// but not the asynchronous ones (by the nesting nature of the timers). + std::chrono::nanoseconds addAsyncUserTime() { + auto added = std::chrono::nanoseconds(0); + for (auto &child : children) + added += child.second->addAsyncUserTime(); + for (auto &thread : asyncChildren) { + for (auto &child : thread.second) { + child.second->addAsyncUserTime(); + added += child.second->userTime; + } + } + userTime += added; + return added; + } + + /// Ensure that this timer and recursively all its children have their async + /// children folded into the main map of children. + void mergeAsyncChildren() { + for (auto &child : children) + child.second->mergeAsyncChildren(); + mergeChildren(std::move(asyncChildren)); + assert(asyncChildren.empty()); + } + + /// Merge multiple child timers into this timer. + /// + /// Children in `other` are added as children to this timer, or, if this timer + /// already contains a child with the corresponding unique identifier, are + /// merged into the existing child. + void mergeChildren(ChildrenMap &&other) { + if (children.empty()) { + children = std::move(other); + for (auto &child : other) + child.second->mergeAsyncChildren(); + } else { + for (auto &child : other) + mergeChild(child.first, std::move(child.second)); + other.clear(); + } + } + + /// See above. + void mergeChildren(AsyncChildrenMap &&other) { + for (auto &thread : other) { + mergeChildren(std::move(thread.second)); + assert(thread.second.empty()); + } + other.clear(); + } + + /// Merge a child timer into this timer for a given unique identifier. + /// + /// Moves all child and async child timers of `other` into this timer's child + /// for the given unique identifier. + void mergeChild(const void *id, std::unique_ptr &&other) { + auto &into = children[id]; + if (!into) { + into = std::move(other); + into->mergeAsyncChildren(); + } else { + into->wallTime = std::max(into->wallTime, other->wallTime); + into->userTime += other->userTime; + into->mergeChildren(std::move(other->children)); + into->mergeChildren(std::move(other->asyncChildren)); + other.reset(); + } + } + + /// Dump a human-readable tree representation of the timer and its children. + /// This is useful for debugging the timing mechanisms and structure of the + /// timers. + void dump(raw_ostream &os, unsigned indent = 0, unsigned markThreadId = 0) { + auto time = getTimeRecord(); + os << std::string(indent * 2, ' ') << name << " [" << threadId << "]" + << llvm::format(" %7.4f / %7.4f", time.user, time.wall); + if (threadId != markThreadId && markThreadId != 0) + os << " (*)"; + os << "\n"; + for (auto &child : children) + child.second->dump(os, indent + 1, threadId); + for (auto &thread : asyncChildren) + for (auto &child : thread.second) + child.second->dump(os, indent + 1, threadId); + } + + /// Returns the time for this timer in seconds. + TimeRecord getTimeRecord() { + return TimeRecord( + std::chrono::duration_cast>(wallTime) + .count(), + std::chrono::duration_cast>(userTime) + .count()); + } + + /// Print the timing result in list mode. + void printAsList(raw_ostream &os, TimeRecord total) { + // Flatten the leaf timers in the tree and merge them by name. + llvm::StringMap mergedTimers; + std::function addTimer = [&](TimerImpl *timer) { + mergedTimers[timer->name] += timer->getTimeRecord(); + for (auto &children : timer->children) + addTimer(children.second.get()); + }; + addTimer(this); + + // Sort the timing information by wall time. + std::vector> timerNameAndTime; + for (auto &it : mergedTimers) + timerNameAndTime.emplace_back(it.first(), it.second); + llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(), + [](const std::pair *lhs, + const std::pair *rhs) { + return llvm::array_pod_sort_comparator( + &rhs->second.wall, &lhs->second.wall); + }); + + // Print the timing information sequentially. + for (auto &timeData : timerNameAndTime) + printTimeEntry(os, 0, timeData.first, timeData.second, total); + } + + /// Print the timing result in tree mode. + void printAsTree(raw_ostream &os, TimeRecord total, unsigned indent = 0) { + unsigned childIndent = indent; + if (!hidden) { + printTimeEntry(os, indent, name, getTimeRecord(), total); + childIndent += 2; + } + for (auto &child : children) { + child.second->printAsTree(os, total, childIndent); + } + } + + /// Print the current timing information. + void print(raw_ostream &os, DisplayMode displayMode) { + // Print the banner. + auto total = getTimeRecord(); + printTimeHeader(os, total); + + // Defer to a specialized printer for each display mode. + switch (displayMode) { + case DisplayMode::List: + printAsList(os, total); + break; + case DisplayMode::Tree: + printAsTree(os, total); + break; + } + + // Print the top-level time not accounted for by child timers, and the + // total. + auto rest = total; + for (auto &child : children) + rest -= child.second->getTimeRecord(); + printTimeEntry(os, 0, "Rest", rest, total); + printTimeEntry(os, 0, "Total", total, total); + os.flush(); + } + + /// The last time instant at which the timer was started. + std::chrono::time_point startTime; + + /// Accumulated wall time. If multiple threads of execution are merged into + /// this timer, the wall time will hold the maximum wall time of each thread + /// of execution. + std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0); + + /// Accumulated user time. If multiple threads of execution are merged into + /// this timer, each thread's user time is added here. + std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0); + + /// The thread on which this timer is running. + uint64_t threadId; + + /// A descriptive name for this timer. + std::string name; + + /// Whether to omit this timer from reports and directly show its children. + bool hidden = false; + + /// Child timers on the same thread the timer itself. We keep at most one + /// timer per unique identifier. + ChildrenMap children; + + /// Child timers on other threads. We keep at most one timer per unique + /// identifier. + AsyncChildrenMap asyncChildren; + + /// Mutex for the async children. + std::mutex asyncMutex; +}; + +} // namespace + +//===----------------------------------------------------------------------===// +// DefaultTimingManager +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace detail { + +/// Implementation details of the `DefaultTimingManager`. +class DefaultTimingManagerImpl { +public: + /// Whether we should do our work or not. + bool enabled = false; + + /// The configured display mode. + DisplayMode displayMode = DisplayMode::Tree; + + /// The stream where we should print our output. This will always be non-null. + raw_ostream *output = &llvm::errs(); + + /// The root timer. + std::unique_ptr rootTimer; +}; + +} // namespace detail +} // namespace mlir + +DefaultTimingManager::DefaultTimingManager() + : impl(std::make_unique()) { + clear(); // initializes the root timer +} + +DefaultTimingManager::~DefaultTimingManager() { print(); } + +/// Enable or disable execution time sampling. +void DefaultTimingManager::setEnabled(bool enabled) { impl->enabled = enabled; } + +/// Return whether execution time sampling is enabled. +bool DefaultTimingManager::isEnabled() const { return impl->enabled; } + +/// Change the display mode. +void DefaultTimingManager::setDisplayMode(DisplayMode displayMode) { + impl->displayMode = displayMode; +} + +/// Return the current display mode; +DefaultTimingManager::DisplayMode DefaultTimingManager::getDisplayMode() const { + return impl->displayMode; +} + +/// Change the stream where the output will be printed to. +void DefaultTimingManager::setOutput(raw_ostream &os) { impl->output = &os; } + +/// Return the current output stream where the output will be printed to. +raw_ostream &DefaultTimingManager::getOutput() const { + assert(impl->output); + return *impl->output; +} + +/// Print and clear the timing results. +void DefaultTimingManager::print() { + if (impl->enabled) { + impl->rootTimer->finalize(); + impl->rootTimer->print(*impl->output, impl->displayMode); + } + clear(); +} + +/// Clear the timing results. +void DefaultTimingManager::clear() { + impl->rootTimer = std::make_unique("root"); + impl->rootTimer->hidden = true; +} + +/// Debug print the timer data structures to an output stream. +void DefaultTimingManager::dumpTimers(raw_ostream &os) { + impl->rootTimer->dump(os); +} + +/// Debug print the timers as a list. +void DefaultTimingManager::dumpAsList(raw_ostream &os) { + impl->rootTimer->finalize(); + impl->rootTimer->print(os, DisplayMode::List); +} + +/// Debug print the timers as a tree. +void DefaultTimingManager::dumpAsTree(raw_ostream &os) { + impl->rootTimer->finalize(); + impl->rootTimer->print(os, DisplayMode::Tree); +} + +Optional DefaultTimingManager::rootTimer() { + if (impl->enabled) + return impl->rootTimer.get(); + return llvm::None; +} + +void DefaultTimingManager::startTimer(void *handle) { + static_cast(handle)->start(); +} + +void DefaultTimingManager::stopTimer(void *handle) { + static_cast(handle)->stop(); +} + +void *DefaultTimingManager::nestTimer(void *handle, const void *id, + function_ref nameBuilder) { + return static_cast(handle)->nest(id, std::move(nameBuilder)); +} + +void DefaultTimingManager::hideTimer(void *handle) { + static_cast(handle)->hidden = true; +} + +//===----------------------------------------------------------------------===// +// DefaultTimingManager Command Line Options +//===----------------------------------------------------------------------===// + +namespace { +struct DefaultTimingManagerOptions { + llvm::cl::opt timing{"mlir-timing", + llvm::cl::desc("Display execution times"), + llvm::cl::init(false)}; + llvm::cl::opt displayMode{ + "mlir-timing-display", llvm::cl::desc("Display method for timing data"), + llvm::cl::init(DisplayMode::Tree), + llvm::cl::values( + clEnumValN(DisplayMode::List, "list", + "display the results in a list sorted by total time"), + clEnumValN(DisplayMode::Tree, "tree", + "display the results ina with a nested tree view"))}; +}; +} // end anonymous namespace + +static llvm::ManagedStatic options; + +void mlir::registerDefaultTimingManagerCLOptions() { + // Make sure that the options struct has been constructed. + *options; +} + +void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) { + if (!options.isConstructed()) + return; + tm.setEnabled(options->timing); + tm.setDisplayMode(options->displayMode); +} diff --git a/mlir/test/Pass/pass-timing.mlir b/mlir/test/Pass/pass-timing.mlir --- a/mlir/test/Pass/pass-timing.mlir +++ b/mlir/test/Pass/pass-timing.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s -// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s -// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s -// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s -// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=false -test-pm-nested-pipeline -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=NESTED_MT_PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=false -test-pm-nested-pipeline -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=NESTED_MT_PIPELINE %s -// LIST: Pass execution timing report +// LIST: Execution time report // LIST: Total Execution Time: // LIST: Name // LIST-DAG: Canonicalizer @@ -12,18 +12,21 @@ // LIST-DAG: DominanceInfo // LIST: Total -// PIPELINE: Pass execution timing report +// PIPELINE: Execution time report // PIPELINE: Total Execution Time: // PIPELINE: Name +// PIPELINE-NEXT: Parser // PIPELINE-NEXT: 'func' Pipeline // PIPELINE-NEXT: CSE // PIPELINE-NEXT: (A) DominanceInfo // PIPELINE-NEXT: Canonicalizer // PIPELINE-NEXT: CSE // PIPELINE-NEXT: (A) DominanceInfo +// PIPELINE-NEXT: Output +// PIPELINE-NEXT: Rest // PIPELINE-NEXT: Total -// MT_LIST: Pass execution timing report +// MT_LIST: Execution time report // MT_LIST: Total Execution Time: // MT_LIST: Name // MT_LIST-DAG: Canonicalizer @@ -31,20 +34,24 @@ // MT_LIST-DAG: DominanceInfo // MT_LIST: Total -// MT_PIPELINE: Pass execution timing report +// MT_PIPELINE: Execution time report // MT_PIPELINE: Total Execution Time: // MT_PIPELINE: Name +// MT_PIPELINE-NEXT: Parser // MT_PIPELINE-NEXT: 'func' Pipeline // MT_PIPELINE-NEXT: CSE // MT_PIPELINE-NEXT: (A) DominanceInfo // MT_PIPELINE-NEXT: Canonicalizer // MT_PIPELINE-NEXT: CSE // MT_PIPELINE-NEXT: (A) DominanceInfo +// MT_PIPELINE-NEXT: Output +// MT_PIPELINE-NEXT: Rest // MT_PIPELINE-NEXT: Total -// NESTED_MT_PIPELINE: Pass execution timing report +// NESTED_MT_PIPELINE: Execution time report // NESTED_MT_PIPELINE: Total Execution Time: // NESTED_MT_PIPELINE: Name +// NESTED_MT_PIPELINE-NEXT: Parser // NESTED_MT_PIPELINE-NEXT: Pipeline Collection : ['func', 'module'] // NESTED_MT_PIPELINE-NEXT: 'func' Pipeline // NESTED_MT_PIPELINE-NEXT: TestFunctionPass @@ -52,6 +59,8 @@ // NESTED_MT_PIPELINE-NEXT: TestModulePass // NESTED_MT_PIPELINE-NEXT: 'func' Pipeline // NESTED_MT_PIPELINE-NEXT: TestFunctionPass +// NESTED_MT_PIPELINE-NEXT: Output +// NESTED_MT_PIPELINE-NEXT: Rest // NESTED_MT_PIPELINE-NEXT: Total func @foo() { diff --git a/mlir/test/Pass/pipeline-parsing.mlir b/mlir/test/Pass/pipeline-parsing.mlir --- a/mlir/test/Pass/pipeline-parsing.mlir +++ b/mlir/test/Pass/pipeline-parsing.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline='module(test-module-pass,func(test-function-pass)),func(test-function-pass)' -pass-pipeline="func(cse,canonicalize)" -verify-each=false -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck %s -// RUN: mlir-opt %s -test-textual-pm-nested-pipeline -verify-each=false -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK +// RUN: mlir-opt %s -pass-pipeline='module(test-module-pass,func(test-function-pass)),func(test-function-pass)' -pass-pipeline="func(cse,canonicalize)" -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s +// RUN: mlir-opt %s -test-textual-pm-nested-pipeline -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK // RUN: not mlir-opt %s -pass-pipeline='module(test-module-pass' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s // RUN: not mlir-opt %s -pass-pipeline='module(test-module-pass))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s // RUN: not mlir-opt %s -pass-pipeline='module()(' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s