diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -70,6 +70,9 @@ unsigned getThreadCount() const { return ThreadCount; } + /// Returns true if the current thread is a worker thread of this thread pool. + bool isWorkerThread() const; + private: bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); } diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -72,6 +72,14 @@ CompletionCondition.wait(LockGuard, [&] { return workCompletedUnlocked(); }); } +bool ThreadPool::isWorkerThread() const { + std::thread::id CurrentThreadId = std::this_thread::get_id(); + for (const std::thread &Thread : Threads) + if (CurrentThreadId == Thread.get_id()) + return true; + return false; +} + std::shared_future ThreadPool::asyncImpl(TaskTy Task) { /// Wrap the Task in a packaged_task to return a future object. PackagedTaskTy PackagedTask(std::move(Task)); diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -15,6 +15,10 @@ #include #include +namespace llvm { +class ThreadPool; +} // end namespace llvm + namespace mlir { class AbstractOperation; class DebugActionManager; @@ -114,6 +118,12 @@ disableMultithreading(!enable); } + /// Return the thread pool owned by this context. This method requires that + /// multithreading be enabled within the context, and should generally not be + /// used directly. Users should instead prefer the threading utilities within + /// Threading.h. + llvm::ThreadPool &getThreadPool(); + /// Return true if we should attach the operation to diagnostics emitted via /// Operation::emit. bool shouldPrintOpOnDiagnostic(); diff --git a/mlir/include/mlir/IR/Threading.h b/mlir/include/mlir/IR/Threading.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/IR/Threading.h @@ -0,0 +1,153 @@ +//===- Threading.h - MLIR Threading Utilities -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines various utilies for multithreaded processing within MLIR. +// These utilities automatically handle many of the necessary threading +// conditions, such as properly ordering diagnostics, observing if threading is +// disabled, etc. These utilities should be used over other threading utilities +// whenever feasible. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_THREADING_H +#define MLIR_IR_THREADING_H + +#include "mlir/IR/Diagnostics.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/Support/ThreadPool.h" +#include + +namespace mlir { + +/// Invoke the given function on the elements between [begin, end) +/// asynchronously. If the given function returns a failure when processing any +/// of the elements, execution is stopped and a failure is returned from this +/// function. This means that in the case of failure, not all elements of the +/// range will be processed. Diagnostics emitted during processing are ordered +/// relative to the element's position within [begin, end). If the provided +/// context does not have multi-threading enabled, this function always +/// processes elements sequentially. +template +LogicalResult failableParallelForEach(MLIRContext *context, IteratorT begin, + IteratorT end, FuncT &&func) { + unsigned numElements = static_cast(std::distance(begin, end)); + if (numElements == 0) + return success(); + + // If multithreading is disabled or there is a small number of elements, + // process the elements directly on this thread. + // FIXME: ThreadPool should allow work stealing to avoid deadlocks when + // scheduling work within a worker thread. + if (!context->isMultithreadingEnabled() || numElements <= 1 || + context->getThreadPool().isWorkerThread()) { + for (; begin != end; ++begin) + if (failed(func(*begin))) + return failure(); + return success(); + } + + // Build a wrapper processing function that properly initializes a parallel + // diagnostic handler. + ParallelDiagnosticHandler handler(context); + std::atomic curIndex(0); + std::atomic processingFailed(false); + auto processFn = [&] { + while (!processingFailed) { + unsigned index = curIndex++; + if (index >= numElements) + break; + handler.setOrderIDForThread(index); + if (failed(func(*std::next(begin, index)))) + processingFailed = true; + handler.eraseOrderIDForThread(); + } + }; + + // Otherwise, process the elements in parallel. + llvm::ThreadPool &threadPool = context->getThreadPool(); + size_t numActions = std::min(numElements, threadPool.getThreadCount()); + SmallVector> threadFutures; + threadFutures.reserve(numActions - 1); + for (unsigned i = 1; i < numActions; ++i) + threadFutures.emplace_back(threadPool.async(processFn)); + processFn(); + + // Wait for all of the threads to finish. + for (std::shared_future &future : threadFutures) + future.wait(); + return failure(processingFailed); +} + +/// Invoke the given function on the elements in the provided range +/// asynchronously. If the given function returns a failure when processing any +/// of the elements, execution is stopped and a failure is returned from this +/// function. This means that in the case of failure, not all elements of the +/// range will be processed. Diagnostics emitted during processing are ordered +/// relative to the element's position within the range. If the provided context +/// does not have multi-threading enabled, this function always processes +/// elements sequentially. +template +LogicalResult failableParallelForEach(MLIRContext *context, RangeT &&range, + FuncT &&func) { + return failableParallelForEach(context, std::begin(range), std::end(range), + std::forward(func)); +} + +/// Invoke the given function on the elements between [begin, end) +/// asynchronously. If the given function returns a failure when processing any +/// of the elements, execution is stopped and a failure is returned from this +/// function. This means that in the case of failure, not all elements of the +/// range will be processed. Diagnostics emitted during processing are ordered +/// relative to the element's position within [begin, end). If the provided +/// context does not have multi-threading enabled, this function always +/// processes elements sequentially. +template +LogicalResult failableParallelForEachN(MLIRContext *context, size_t begin, + size_t end, FuncT &&func) { + return failableParallelForEach(context, llvm::seq(begin, end), + std::forward(func)); +} + +/// Invoke the given function on the elements between [begin, end) +/// asynchronously. Diagnostics emitted during processing are ordered relative +/// to the element's position within [begin, end). If the provided context does +/// not have multi-threading enabled, this function always processes elements +/// sequentially. +template +void parallelForEach(MLIRContext *context, IteratorT begin, IteratorT end, + FuncT &&func) { + (void)failableParallelForEach(context, begin, end, [&](auto &&value) { + return func(std::forward(value)), success(); + }); +} + +/// Invoke the given function on the elements in the provided range +/// asynchronously. Diagnostics emitted during processing are ordered relative +/// to the element's position within the range. If the provided context does not +/// have multi-threading enabled, this function always processes elements +/// sequentially. +template +void parallelForEach(MLIRContext *context, RangeT &&range, FuncT &&func) { + parallelForEach(context, std::begin(range), std::end(range), + std::forward(func)); +} + +/// Invoke the given function on the elements between [begin, end) +/// asynchronously. Diagnostics emitted during processing are ordered relative +/// to the element's position within [begin, end). If the provided context does +/// not have multi-threading enabled, this function always processes elements +/// sequentially. +template +void parallelForEachN(MLIRContext *context, size_t begin, size_t end, + FuncT &&func) { + parallelForEach(context, llvm::seq(begin, end), std::forward(func)); +} + +} // end namespace mlir + +#endif // MLIR_IR_THREADING_H diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RWMutex.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/raw_ostream.h" #include @@ -260,6 +261,9 @@ // Other //===--------------------------------------------------------------------===// + /// The thread pool to use when processing MLIR tasks in parallel. + llvm::ThreadPool threadPool; + /// This is a list of dialects that are created referring to this context. /// The MLIRContext owns the objects. DenseMap> loadedDialects; @@ -571,6 +575,12 @@ impl->typeUniquer.disableMultithreading(disable); } +llvm::ThreadPool &MLIRContext::getThreadPool() { + assert(isMultithreadingEnabled() && + "expected multi-threading to be enabled within the context"); + return impl->threadPool; +} + void MLIRContext::enterMultiThreadedExecution() { #ifndef NDEBUG ++impl->multiThreadedExecutionContext; diff --git a/mlir/lib/IR/Verifier.cpp b/mlir/lib/IR/Verifier.cpp --- a/mlir/lib/IR/Verifier.cpp +++ b/mlir/lib/IR/Verifier.cpp @@ -30,6 +30,7 @@ #include "mlir/IR/Dominance.h" #include "mlir/IR/Operation.h" #include "mlir/IR/RegionKindInterface.h" +#include "mlir/IR/Threading.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Parallel.h" @@ -43,11 +44,6 @@ /// This class encapsulates all the state used to verify an operation region. class OperationVerifier { public: - explicit OperationVerifier(MLIRContext *context) - // TODO: Re-enable parallelism once deadlocks found in D104207 are - // resolved. - : parallelismEnabled(false) {} - /// Verify the given operation. LogicalResult verifyOpAndDominance(Operation &op); @@ -66,9 +62,6 @@ /// Operation. LogicalResult verifyDominanceOfContainedRegions(Operation &op, DominanceInfo &domInfo); - - /// This is true if parallelism is enabled on the MLIRContext. - const bool parallelismEnabled; }; } // end anonymous namespace @@ -91,28 +84,9 @@ // Check the dominance properties and invariants of any operations in the // regions contained by the 'opsWithIsolatedRegions' operations. - if (!parallelismEnabled || opsWithIsolatedRegions.size() <= 1) { - // If parallelism is disabled or if there is only 0/1 operation to do, use - // a simple non-parallel loop. - for (Operation *op : opsWithIsolatedRegions) { - if (failed(verifyOpAndDominance(*op))) - return failure(); - } - } else { - // Otherwise, verify the operations and their bodies in parallel. - ParallelDiagnosticHandler handler(op.getContext()); - std::atomic passFailed(false); - llvm::parallelForEachN(0, opsWithIsolatedRegions.size(), [&](size_t opIdx) { - handler.setOrderIDForThread(opIdx); - if (failed(verifyOpAndDominance(*opsWithIsolatedRegions[opIdx]))) - passFailed = true; - handler.eraseOrderIDForThread(); - }); - if (passFailed) - return failure(); - } - - return success(); + return failableParallelForEach( + op.getContext(), opsWithIsolatedRegions, + [&](Operation *op) { return verifyOpAndDominance(*op); }); } /// Returns true if this block may be valid without terminator. That is if: @@ -378,5 +352,5 @@ /// compiler bugs. On error, this reports the error through the MLIRContext and /// returns failure. LogicalResult mlir::verify(Operation *op) { - return OperationVerifier(op->getContext()).verifyOpAndDominance(*op); + return OperationVerifier().verifyOpAndDominance(*op); } diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -14,6 +14,7 @@ #include "PassDetail.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/Dialect.h" +#include "mlir/IR/Threading.h" #include "mlir/IR/Verifier.h" #include "mlir/Support/FileUtilities.h" #include "llvm/ADT/STLExtras.h" @@ -580,61 +581,40 @@ } } - // A parallel diagnostic handler that provides deterministic diagnostic - // ordering. - ParallelDiagnosticHandler diagHandler(&getContext()); - - // An index for the current operation/analysis manager pair. - std::atomic opIt(0); - // Get the current thread for this adaptor. PassInstrumentation::PipelineParentInfo parentInfo = {llvm::get_threadid(), this}; auto *instrumentor = am.getPassInstrumentor(); // An atomic failure variable for the async executors. - std::atomic passFailed(false); - llvm::parallelForEach( - asyncExecutors.begin(), - std::next(asyncExecutors.begin(), - std::min(asyncExecutors.size(), opAMPairs.size())), - [&](MutableArrayRef pms) { - for (auto e = opAMPairs.size(); !passFailed && opIt < e;) { - // Get the next available operation index. - unsigned nextID = opIt++; - if (nextID >= e) - break; - - // Set the order id for this thread in the diagnostic handler. - diagHandler.setOrderIDForThread(nextID); - - // Get the pass manager for this operation and execute it. - auto &it = opAMPairs[nextID]; - auto *pm = findPassManagerFor( - pms, it.first->getName().getIdentifier(), getContext()); - assert(pm && "expected valid pass manager for operation"); - - unsigned initGeneration = pm->impl->initializationGeneration; - LogicalResult pipelineResult = - runPipeline(pm->getPasses(), it.first, it.second, verifyPasses, - initGeneration, instrumentor, &parentInfo); - - // Drop this thread from being tracked by the diagnostic handler. - // After this task has finished, the thread may be used outside of - // this pass manager context meaning that we don't want to track - // diagnostics from it anymore. - diagHandler.eraseOrderIDForThread(); - - // Handle a failed pipeline result. - if (failed(pipelineResult)) { - passFailed = true; - break; - } - } - }); + std::vector> activePMs(asyncExecutors.size()); + std::fill(activePMs.begin(), activePMs.end(), false); + auto processFn = [&](auto &opPMPair) { + // Find a pass manager for this operation. + auto it = llvm::find_if(activePMs, [](std::atomic &isActive) { + bool expectedInactive = false; + return isActive.compare_exchange_strong(expectedInactive, true); + }); + unsigned pmIndex = it - activePMs.begin(); + + // Get the pass manager for this operation and execute it. + auto *pm = findPassManagerFor(asyncExecutors[pmIndex], + opPMPair.first->getName().getIdentifier(), + getContext()); + assert(pm && "expected valid pass manager for operation"); + + unsigned initGeneration = pm->impl->initializationGeneration; + LogicalResult pipelineResult = + runPipeline(pm->getPasses(), opPMPair.first, opPMPair.second, + verifyPasses, initGeneration, instrumentor, &parentInfo); + + // Reset the active bit for this pass manager. + activePMs[pmIndex].store(false); + return pipelineResult; + }; // Signal a failure if any of the executors failed. - if (passFailed) + if (failed(failableParallelForEach(&getContext(), opAMPairs, processFn))) signalPassFailure(); } diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -15,6 +15,7 @@ #include "PassDetail.h" #include "mlir/Analysis/CallGraph.h" +#include "mlir/IR/Threading.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/InliningUtils.h" @@ -662,21 +663,9 @@ return success(); // Optimize each of the nodes within the SCC in parallel. - // NOTE: This is simple now, because we don't enable optimizing nodes within - // children. When we remove this restriction, this logic will need to be - // reworked. - if (context->isMultithreadingEnabled() && nodesToVisit.size() > 1) { - if (failed(optimizeSCCAsync(nodesToVisit, context))) + if (failed(optimizeSCCAsync(nodesToVisit, context))) return failure(); - // Otherwise, we are optimizing within a single thread. - } else { - for (CallGraphNode *node : nodesToVisit) { - if (failed(optimizeCallable(node, opPipelines[0]))) - return failure(); - } - } - // Recompute the uses held by each of the nodes. for (CallGraphNode *node : nodesToVisit) useList.recomputeUses(node, cg); @@ -685,7 +674,7 @@ LogicalResult InlinerPass::optimizeSCCAsync(MutableArrayRef nodesToVisit, - MLIRContext *context) { + MLIRContext *ctx) { // Ensure that there are enough pipeline maps for the optimizer to run in // parallel. Note: The number of pass managers here needs to remain constant // to prevent issues with pass instrumentations that rely on having the same @@ -703,35 +692,24 @@ for (CallGraphNode *node : nodesToVisit) getAnalysisManager().nest(node->getCallableRegion()->getParentOp()); - // An index for the current node to optimize. - std::atomic nodeIt(0); - - // Optimize the nodes of the SCC in parallel. - ParallelDiagnosticHandler optimizerHandler(context); - std::atomic passFailed(false); - llvm::parallelForEach( - opPipelines.begin(), std::next(opPipelines.begin(), numThreads), - [&](llvm::StringMap &pipelines) { - for (auto e = nodesToVisit.size(); !passFailed && nodeIt < e;) { - // Get the next available operation index. - unsigned nextID = nodeIt++; - if (nextID >= e) - break; - - // Set the order for this thread so that diagnostics will be - // properly ordered, and reset after optimization has finished. - optimizerHandler.setOrderIDForThread(nextID); - LogicalResult pipelineResult = - optimizeCallable(nodesToVisit[nextID], pipelines); - optimizerHandler.eraseOrderIDForThread(); - - if (failed(pipelineResult)) { - passFailed = true; - break; - } - } - }); - return failure(passFailed); + // An atomic failure variable for the async executors. + std::vector> activePMs(opPipelines.size()); + std::fill(activePMs.begin(), activePMs.end(), false); + return failableParallelForEach(ctx, nodesToVisit, [&](CallGraphNode *node) { + // Find a pass manager for this operation. + auto it = llvm::find_if(activePMs, [](std::atomic &isActive) { + bool expectedInactive = false; + return isActive.compare_exchange_strong(expectedInactive, true); + }); + unsigned pmIndex = it - activePMs.begin(); + + // Optimize this callable node. + LogicalResult result = optimizeCallable(node, opPipelines[pmIndex]); + + // Reset the active bit for this pass manager. + activePMs[pmIndex].store(false); + return result; + }); } LogicalResult diff --git a/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir b/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir --- a/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-super-vectorizer-test -compose-maps 2>&1 | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-super-vectorizer-test -compose-maps -split-input-file 2>&1 | FileCheck %s // For all these cases, the test traverses the `test_affine_map` ops and // composes them in order one-by-one. @@ -16,6 +16,8 @@ return } +// ----- + func @simple2() { // CHECK: Composed map: (d0)[s0, s1] -> (d0 - s0 + s1) "test_affine_map"() { affine_map = affine_map<(d0)[s0] -> (d0 + s0 - 1)> } : () -> () @@ -23,6 +25,8 @@ return } +// ----- + func @simple3a() { // CHECK: Composed map: (d0, d1)[s0, s1, s2, s3] -> ((d0 ceildiv s2) * s0, (d1 ceildiv s3) * s1) "test_affine_map"() { affine_map = affine_map<(d0, d1)[s0, s1] -> (d0 ceildiv s0, d1 ceildiv s1)> } : () -> () @@ -30,12 +34,16 @@ return } +// ----- + func @simple3b() { // CHECK: Composed map: (d0, d1)[s0, s1] -> (d0 mod s0, d1 mod s1) "test_affine_map"() { affine_map = affine_map<(d0, d1)[s0, s1] -> (d0 mod s0, d1 mod s1)> } : () -> () return } +// ----- + func @simple3c() { // CHECK: Composed map: (d0, d1)[s0, s1, s2, s3, s4, s5] -> ((d0 ceildiv s4) * s4 + d0 mod s2, (d1 ceildiv s5) * s5 + d1 mod s3) "test_affine_map"() { affine_map = affine_map<(d0, d1)[s0, s1] -> ((d0 ceildiv s0) * s0, (d1 ceildiv s1) * s1, d0, d1)> } : () -> () @@ -43,6 +51,8 @@ return } +// ----- + func @simple4() { // CHECK: Composed map: (d0, d1)[s0, s1] -> (d1 * s1, d0 ceildiv s0) "test_affine_map"() { affine_map = affine_map<(d0, d1) -> (d1, d0)> } : () -> () @@ -50,6 +60,8 @@ return } +// ----- + func @simple5a() { // CHECK: Composed map: (d0) -> (d0 * 3 + 18) "test_affine_map"() { affine_map = affine_map<(d0) -> (d0 - 1)> } : () -> () @@ -59,6 +71,8 @@ return } +// ----- + func @simple5b() { // CHECK: Composed map: (d0) -> ((d0 + 6) ceildiv 2) "test_affine_map"() { affine_map = affine_map<(d0) -> (d0 - 1)> } : () -> () @@ -68,6 +82,8 @@ return } +// ----- + func @simple5c() { // CHECK: Composed map: (d0) -> (d0 * 8 + 48) "test_affine_map"() { affine_map = affine_map<(d0) -> (d0 - 1)> } : () -> () @@ -77,6 +93,8 @@ return } +// ----- + func @simple5d() { // CHECK: Composed map: (d0) -> ((d0 * 4) floordiv 3 + 8) "test_affine_map"() { affine_map = affine_map<(d0) -> (d0 - 1)> } : () -> () @@ -86,6 +104,8 @@ return } +// ----- + func @simple5e() { // CHECK: Composed map: (d0) -> ((d0 + 6) ceildiv 8) "test_affine_map"() { affine_map = affine_map<(d0) -> (d0 - 1)> } : () -> () @@ -94,6 +114,8 @@ return } +// ----- + func @simple5f() { // CHECK: Composed map: (d0) -> ((d0 * 4 - 4) floordiv 3) "test_affine_map"() { affine_map = affine_map<(d0) -> (d0 - 1)> } : () -> () @@ -102,6 +124,8 @@ return } +// ----- + func @perm_and_proj() { // CHECK: Composed map: (d0, d1, d2, d3) -> (d1, d3, d0) "test_affine_map"() { affine_map = affine_map<(d0, d1, d2, d3) -> (d3, d1, d2, d0)> } : () -> () @@ -109,6 +133,8 @@ return } +// ----- + func @symbols1() { // CHECK: Composed map: (d0)[s0] -> (d0 + s0 + 1, d0 - s0 - 1) "test_affine_map"() { affine_map = affine_map<(d0)[s0] -> (d0 + s0, d0 - s0)> } : () -> () @@ -116,6 +142,8 @@ return } +// ----- + func @drop() { // CHECK: Composed map: (d0, d1, d2)[s0, s1] -> (d0 * 2 + d1 + d2 + s1) "test_affine_map"() { affine_map = affine_map<(d0, d1, d2)[s0, s1] -> (d0 + s1, d1 + s0, d0 + d1 + d2)> } : () -> () @@ -123,6 +151,8 @@ return } +// ----- + func @multi_symbols() { // CHECK: Composed map: (d0)[s0, s1, s2] -> (d0 + s1 + s2 + 1, d0 - s0 - s2 - 1) "test_affine_map"() { affine_map = affine_map<(d0)[s0] -> (d0 + s0, d0 - s0)> } : () -> () diff --git a/mlir/test/Dialect/Affine/slicing-utils.mlir b/mlir/test/Dialect/Affine/slicing-utils.mlir --- a/mlir/test/Dialect/Affine/slicing-utils.mlir +++ b/mlir/test/Dialect/Affine/slicing-utils.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-super-vectorizer-test -forward-slicing=true 2>&1 | FileCheck %s --check-prefix=FWD -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-super-vectorizer-test -backward-slicing=true 2>&1 | FileCheck %s --check-prefix=BWD -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-super-vectorizer-test -slicing=true 2>&1 | FileCheck %s --check-prefix=FWDBWD +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test -forward-slicing=true 2>&1 | FileCheck %s --check-prefix=FWD +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test -backward-slicing=true 2>&1 | FileCheck %s --check-prefix=BWD +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test -slicing=true 2>&1 | FileCheck %s --check-prefix=FWDBWD /// 1 2 3 4 /// |_______| |______| @@ -217,6 +217,8 @@ return } +// ----- + // FWD-LABEL: slicing_test_2 // BWD-LABEL: slicing_test_2 // FWDBWD-LABEL: slicing_test_2 @@ -250,6 +252,8 @@ return } +// ----- + // FWD-LABEL: slicing_test_3 // BWD-LABEL: slicing_test_3 // FWDBWD-LABEL: slicing_test_3 @@ -265,6 +269,8 @@ return } +// ----- + // FWD-LABEL: slicing_test_function_argument // BWD-LABEL: slicing_test_function_argument // FWDBWD-LABEL: slicing_test_function_argument @@ -274,6 +280,8 @@ return %0 : index } +// ----- + // FWD-LABEL: slicing_test_multiple_return // BWD-LABEL: slicing_test_multiple_return // FWDBWD-LABEL: slicing_test_multiple_return diff --git a/mlir/test/IR/diagnostic-handler-filter.mlir b/mlir/test/IR/diagnostic-handler-filter.mlir --- a/mlir/test/IR/diagnostic-handler-filter.mlir +++ b/mlir/test/IR/diagnostic-handler-filter.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-diagnostic-filter='filters=mysource1' -o - 2>&1 | FileCheck %s +// RUN: mlir-opt %s -test-diagnostic-filter='filters=mysource1' -split-input-file -o - 2>&1 | FileCheck %s // This test verifies that diagnostic handler can emit the call stack successfully. // CHECK-LABEL: Test 'test1' @@ -8,6 +8,8 @@ test.loc = loc(callsite("foo"("mysource1":0:0) at callsite("mysource2":1:0 at "mysource3":2:0))) } +// ----- + // CHECK-LABEL: Test 'test2' // CHECK-NEXT: mysource1:0:0: error: test diagnostic func private @test2() attributes { diff --git a/mlir/test/Pass/pass-timing.mlir b/mlir/test/Pass/pass-timing.mlir --- a/mlir/test/Pass/pass-timing.mlir +++ b/mlir/test/Pass/pass-timing.mlir @@ -2,7 +2,7 @@ // RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s // RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s // RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s -// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=false -test-pm-nested-pipeline -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=NESTED_MT_PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=false -test-pm-nested-pipeline -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=NESTED_PIPELINE %s // LIST: Execution time report // LIST: Total Execution Time: @@ -48,20 +48,20 @@ // MT_PIPELINE-NEXT: Rest // MT_PIPELINE-NEXT: Total -// NESTED_MT_PIPELINE: Execution time report -// NESTED_MT_PIPELINE: Total Execution Time: -// NESTED_MT_PIPELINE: Name -// NESTED_MT_PIPELINE-NEXT: Parser -// NESTED_MT_PIPELINE-NEXT: Pipeline Collection : ['func', 'module'] -// NESTED_MT_PIPELINE-NEXT: 'func' Pipeline -// NESTED_MT_PIPELINE-NEXT: TestFunctionPass -// NESTED_MT_PIPELINE-NEXT: 'module' Pipeline -// NESTED_MT_PIPELINE-NEXT: TestModulePass -// NESTED_MT_PIPELINE-NEXT: 'func' Pipeline -// NESTED_MT_PIPELINE-NEXT: TestFunctionPass -// NESTED_MT_PIPELINE-NEXT: Output -// NESTED_MT_PIPELINE-NEXT: Rest -// NESTED_MT_PIPELINE-NEXT: Total +// NESTED_PIPELINE: Execution time report +// NESTED_PIPELINE: Total Execution Time: +// NESTED_PIPELINE: Name +// NESTED_PIPELINE-NEXT: Parser +// NESTED_PIPELINE-NEXT: Pipeline Collection : ['func', 'module'] +// NESTED_PIPELINE-NEXT: 'func' Pipeline +// NESTED_PIPELINE-NEXT: TestFunctionPass +// NESTED_PIPELINE-NEXT: 'module' Pipeline +// NESTED_PIPELINE-NEXT: TestModulePass +// NESTED_PIPELINE-NEXT: 'func' Pipeline +// NESTED_PIPELINE-NEXT: TestFunctionPass +// NESTED_PIPELINE-NEXT: Output +// NESTED_PIPELINE-NEXT: Rest +// NESTED_PIPELINE-NEXT: Total func @foo() { return diff --git a/mlir/test/Pass/pipeline-parsing.mlir b/mlir/test/Pass/pipeline-parsing.mlir --- a/mlir/test/Pass/pipeline-parsing.mlir +++ b/mlir/test/Pass/pipeline-parsing.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline='module(test-module-pass,func(test-function-pass)),func(test-function-pass)' -pass-pipeline="func(cse,canonicalize)" -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s -// RUN: mlir-opt %s -test-textual-pm-nested-pipeline -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK +// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='module(test-module-pass,func(test-function-pass)),func(test-function-pass)' -pass-pipeline="func(cse,canonicalize)" -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s +// RUN: mlir-opt %s -mlir-disable-threading -test-textual-pm-nested-pipeline -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK // RUN: not mlir-opt %s -pass-pipeline='module(test-module-pass' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s // RUN: not mlir-opt %s -pass-pipeline='module(test-module-pass))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s // RUN: not mlir-opt %s -pass-pipeline='module()(' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s