diff --git a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h --- a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h @@ -29,6 +29,10 @@ ArrayRef lbHandles, ArrayRef ubHandles, int64_t step); +LoopBuilder makeAffineLoopBuilder(OperationHandle *loop, ValueHandle *iv, + ArrayRef lbHandles, + ArrayRef ubHandles, + int64_t step); /// Explicit nested LoopBuilder. Offers a compressed multi-loop builder to avoid /// explicitly writing all the loops in a nest. This simple functionality is @@ -60,8 +64,14 @@ /// and and `min` constraints respectively. AffineLoopNestBuilder(ValueHandle *iv, ArrayRef lbs, ArrayRef ubs, int64_t step); + AffineLoopNestBuilder(OperationHandle *loopHandle, ValueHandle *iv, + ArrayRef lbs, ArrayRef ubs, + int64_t step); AffineLoopNestBuilder(ArrayRef ivs, ArrayRef lbs, ArrayRef ubs, ArrayRef steps); + AffineLoopNestBuilder(ArrayRef loopHandles, + ArrayRef ivs, ArrayRef lbs, + ArrayRef ubs, ArrayRef steps); void operator()(function_ref fun = nullptr); diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h --- a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h @@ -47,6 +47,10 @@ LoopRangeBuilder(ValueHandle *iv, ValueHandle range); LoopRangeBuilder(ValueHandle *iv, Value range); LoopRangeBuilder(ValueHandle *iv, SubViewOp::Range range); + LoopRangeBuilder(OperationHandle *loop, ValueHandle *iv, ValueHandle range); + LoopRangeBuilder(OperationHandle *loop, ValueHandle *iv, Value range); + LoopRangeBuilder(OperationHandle *loop, ValueHandle *iv, + SubViewOp::Range range); LoopRangeBuilder(const LoopRangeBuilder &) = delete; LoopRangeBuilder(LoopRangeBuilder &&) = default; @@ -65,13 +69,20 @@ /// directly. In the current implementation it produces loop.for operations. class LoopNestRangeBuilder { public: - LoopNestRangeBuilder(ArrayRef ivs, - ArrayRef ranges); - LoopNestRangeBuilder(ArrayRef ivs, - ArrayRef ranges); - LoopNestRangeBuilder(ArrayRef ivs, + LoopNestRangeBuilder(ArrayRef ivs, + ArrayRef ranges); + LoopNestRangeBuilder(ArrayRef ivs, ArrayRef ranges); + LoopNestRangeBuilder(ArrayRef ivs, ArrayRef ranges); - edsc::ValueHandle operator()(std::function fun = nullptr); + LoopNestRangeBuilder(ArrayRef loop, + ArrayRef ivs, + ArrayRef ranges); + LoopNestRangeBuilder(ArrayRef loop, + ArrayRef ivs, ArrayRef ranges); + LoopNestRangeBuilder(ArrayRef loop, + ArrayRef ivs, + ArrayRef ranges); + ValueHandle operator()(std::function fun = nullptr); private: SmallVector loops; @@ -81,7 +92,10 @@ /// ranges. template class GenericLoopNestRangeBuilder { public: - GenericLoopNestRangeBuilder(ArrayRef ivs, + GenericLoopNestRangeBuilder(ArrayRef ivs, + ArrayRef ranges); + GenericLoopNestRangeBuilder(ArrayRef loops, + ArrayRef ivs, ArrayRef ranges); void operator()(std::function fun = nullptr) { (*builder)(fun); } diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h @@ -70,6 +70,13 @@ PatternRewriter &rewriter, Operation *op, ArrayRef sizes, ArrayRef operandIndicesToFuse, StringRef linalgMarker); +using LinalgLoops = SmallVector; + +/// Emits a loop nest of with the proper body for `op`. +template +Optional linalgLowerOpToLoops(PatternRewriter &rewriter, + ConcreteOp op); + /// Emits a loop nest of `loop.for` with the proper body for `op`. template LogicalResult linalgOpToLoops(PatternRewriter &rewriter, Operation *op); diff --git a/mlir/include/mlir/Dialect/LoopOps/EDSC/Builders.h b/mlir/include/mlir/Dialect/LoopOps/EDSC/Builders.h --- a/mlir/include/mlir/Dialect/LoopOps/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/LoopOps/EDSC/Builders.h @@ -29,11 +29,26 @@ ArrayRef lbHandles, ArrayRef ubHandles, ArrayRef steps); +/// Constructs and captures a new loop::ParallelOp. Captures the associated +/// induction variables. An array of ValueHandle pointers is passed as the first +/// argument and is the *only* way to capture loop induction variables. +LoopBuilder makeParallelLoopBuilder(OperationHandle *loop, + ArrayRef ivs, + ArrayRef lbHandles, + ArrayRef ubHandles, + ArrayRef steps); + /// Constructs a new loop::ForOp and captures the associated induction /// variable. A ValueHandle pointer is passed as the first argument and is the /// *only* way to capture the loop induction variable. LoopBuilder makeLoopBuilder(ValueHandle *iv, ValueHandle lbHandle, ValueHandle ubHandle, ValueHandle stepHandle); +/// Constructs and captures a new loop::ForOp. Also captures the associated +/// induction variable. A ValueHandle pointer is passed as the first argument +/// and is the *only* way to capture the loop induction variable. +LoopBuilder makeLoopBuilder(OperationHandle *loop, ValueHandle *iv, + ValueHandle lbHandle, ValueHandle ubHandle, + ValueHandle stepHandle); /// Helper class to sugar building loop.parallel loop nests from lower/upper /// bounds and step sizes. @@ -42,6 +57,9 @@ ParallelLoopNestBuilder(ArrayRef ivs, ArrayRef lbs, ArrayRef ubs, ArrayRef steps); + ParallelLoopNestBuilder(OperationHandle *loop, ArrayRef ivs, + ArrayRef lbs, ArrayRef ubs, + ArrayRef steps); void operator()(function_ref fun = nullptr); @@ -54,7 +72,10 @@ /// loop.for. class LoopNestBuilder { public: - LoopNestBuilder(ArrayRef ivs, ArrayRef lbs, + LoopNestBuilder(ArrayRef loop, ArrayRef ivs, + ArrayRef lbs, ArrayRef ubs, + ArrayRef steps); + LoopNestBuilder(ArrayRef ivs, ArrayRef lbs, ArrayRef ubs, ArrayRef steps); void operator()(std::function fun = nullptr); diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h --- a/mlir/include/mlir/EDSC/Builders.h +++ b/mlir/include/mlir/EDSC/Builders.h @@ -26,6 +26,7 @@ class BlockHandle; class CapturableHandle; class NestedBuilder; +class OperationHandle; class ValueHandle; /// Helper class to transparently handle builder insertion points by RAII. @@ -76,143 +77,6 @@ // already be something available in LLVM for this purpose. }; -/// A NestedBuilder is a scoping abstraction to create an idiomatic syntax -/// embedded in C++ that serves the purpose of building nested MLIR. -/// Nesting and compositionality is obtained by using the strict ordering that -/// exists between object construction and method invocation on said object (in -/// our case, the call to `operator()`). -/// This ordering allows implementing an abstraction that decouples definition -/// from declaration (in a PL sense) on placeholders of type ValueHandle and -/// BlockHandle. -class NestedBuilder { -protected: - NestedBuilder() = default; - NestedBuilder(const NestedBuilder &) = delete; - NestedBuilder(NestedBuilder &&other) : bodyScope(other.bodyScope) { - other.bodyScope = nullptr; - } - - NestedBuilder &operator=(const NestedBuilder &) = delete; - NestedBuilder &operator=(NestedBuilder &&other) { - std::swap(bodyScope, other.bodyScope); - return *this; - } - - /// Enter an mlir::Block and setup a ScopedContext to insert operations at - /// the end of it. Since we cannot use c++ language-level scoping to implement - /// scoping itself, we use enter/exit pairs of operations. - /// As a consequence we must allocate a new OpBuilder + ScopedContext and - /// let the escape. - /// Step back "prev" times from the end of the block to set up the insertion - /// point, which is useful for non-empty blocks. - void enter(mlir::Block *block, int prev = 0) { - bodyScope = new ScopedContext( - ScopedContext::getBuilder(), - OpBuilder::InsertPoint(block, std::prev(block->end(), prev)), - ScopedContext::getLocation()); - bodyScope->nestedBuilder = this; - } - - /// Exit the current mlir::Block by explicitly deleting the dynamically - /// allocated OpBuilder and ScopedContext. - void exit() { - // Reclaim now to exit the scope. - bodyScope->nestedBuilder = nullptr; - delete bodyScope; - bodyScope = nullptr; - } - - /// Custom destructor does nothing because we already destroyed bodyScope - /// manually in `exit`. Insert an assertion to defensively guard against - /// improper usage of scoping. - ~NestedBuilder() { - assert(!bodyScope && - "Illegal use of NestedBuilder; must have called exit()"); - } - -private: - ScopedContext *bodyScope = nullptr; -}; - -/// A LoopBuilder is a generic NestedBuilder for loop-like MLIR operations. -/// More specifically it is meant to be used as a temporary object for -/// representing any nested MLIR construct that is "related to" an mlir::Value -/// (for now an induction variable). -/// This is extensible and will evolve in the future as MLIR evolves, hence -/// the name LoopBuilder (as opposed to say ForBuilder or AffineForBuilder). -class LoopBuilder : public NestedBuilder { -public: - LoopBuilder(const LoopBuilder &) = delete; - LoopBuilder(LoopBuilder &&) = default; - - LoopBuilder &operator=(const LoopBuilder &) = delete; - LoopBuilder &operator=(LoopBuilder &&) = default; - - /// The only purpose of this operator is to serve as a sequence point so that - /// the evaluation of `fun` (which build IR snippets in a scoped fashion) is - /// scoped within a LoopBuilder. - void operator()(function_ref fun = nullptr); - -private: - LoopBuilder() = default; - - friend LoopBuilder makeAffineLoopBuilder(ValueHandle *iv, - ArrayRef lbHandles, - ArrayRef ubHandles, - int64_t step); - friend LoopBuilder makeParallelLoopBuilder(ArrayRef ivs, - ArrayRef lbHandles, - ArrayRef ubHandles, - ArrayRef steps); - friend LoopBuilder makeLoopBuilder(ValueHandle *iv, ValueHandle lbHandle, - ValueHandle ubHandle, - ValueHandle stepHandle); -}; - -// This class exists solely to handle the C++ vexing parse case when -// trying to enter a Block that has already been constructed. -class Append {}; - -/// A BlockBuilder is a NestedBuilder for mlir::Block*. -/// This exists by opposition to LoopBuilder which is not related to an -/// mlir::Block* but to a mlir::Value. -/// It is meant to be used as a temporary object for representing any nested -/// MLIR construct that is "related to" an mlir::Block*. -class BlockBuilder : public NestedBuilder { -public: - /// Enters the mlir::Block* previously captured by `bh` and sets the insertion - /// point to its end. - BlockBuilder(BlockHandle bh, Append); - - /// Constructs a new mlir::Block with argument types derived from `args`. - /// Captures the new block in `bh` and its arguments into `args`. - /// Enters the new mlir::Block* and sets the insertion point to its end. - /// - /// Prerequisites: - /// The ValueHandle `args` are typed delayed ValueHandles; i.e. they are - /// not yet bound to mlir::Value. - BlockBuilder(BlockHandle *bh, ArrayRef args); - - /// Constructs a new mlir::Block with argument types derived from `args` and - /// appends it as the last block in the region. - /// Captures the new block in `bh` and its arguments into `args`. - /// Enters the new mlir::Block* and sets the insertion point to its end. - /// - /// Prerequisites: - /// The ValueHandle `args` are typed delayed ValueHandles; i.e. they are - /// not yet bound to mlir::Value. - BlockBuilder(BlockHandle *bh, Region ®ion, ArrayRef args); - - /// The only purpose of this operator is to serve as a sequence point so that - /// the evaluation of `fun` (which build IR snippets in a scoped fashion) is - /// scoped within a BlockBuilder. - void operator()(function_ref fun = nullptr); - -private: - BlockBuilder(BlockBuilder &) = delete; - BlockBuilder &operator=(BlockBuilder &other) = delete; -}; - /// Base class for ValueHandle, OperationHandle and BlockHandle. /// Not meant to be used outside of these classes. class CapturableHandle { @@ -355,6 +219,158 @@ Operation *op; }; +/// A NestedBuilder is a scoping abstraction to create an idiomatic syntax +/// embedded in C++ that serves the purpose of building nested MLIR. +/// Nesting and compositionality is obtained by using the strict ordering that +/// exists between object construction and method invocation on said object (in +/// our case, the call to `operator()`). +/// This ordering allows implementing an abstraction that decouples definition +/// from declaration (in a PL sense) on placeholders of type ValueHandle and +/// BlockHandle. +class NestedBuilder { +protected: + NestedBuilder() = default; + NestedBuilder(const NestedBuilder &) = delete; + NestedBuilder(NestedBuilder &&other) : bodyScope(other.bodyScope) { + other.bodyScope = nullptr; + } + + NestedBuilder &operator=(const NestedBuilder &) = delete; + NestedBuilder &operator=(NestedBuilder &&other) { + std::swap(bodyScope, other.bodyScope); + return *this; + } + + /// Enter an mlir::Block and setup a ScopedContext to insert operations at + /// the end of it. Since we cannot use c++ language-level scoping to implement + /// scoping itself, we use enter/exit pairs of operations. + /// As a consequence we must allocate a new OpBuilder + ScopedContext and + /// let the escape. + /// Step back "prev" times from the end of the block to set up the insertion + /// point, which is useful for non-empty blocks. + void enter(mlir::Block *block, int prev = 0) { + bodyScope = new ScopedContext( + ScopedContext::getBuilder(), + OpBuilder::InsertPoint(block, std::prev(block->end(), prev)), + ScopedContext::getLocation()); + bodyScope->nestedBuilder = this; + } + + /// Exit the current mlir::Block by explicitly deleting the dynamically + /// allocated OpBuilder and ScopedContext. + void exit() { + // Reclaim now to exit the scope. + bodyScope->nestedBuilder = nullptr; + delete bodyScope; + bodyScope = nullptr; + } + + /// Custom destructor does nothing because we already destroyed bodyScope + /// manually in `exit`. Insert an assertion to defensively guard against + /// improper usage of scoping. + ~NestedBuilder() { + assert(!bodyScope && + "Illegal use of NestedBuilder; must have called exit()"); + } + +private: + ScopedContext *bodyScope = nullptr; +}; + +/// A LoopBuilder is a generic NestedBuilder for loop-like MLIR operations. +/// More specifically it is meant to be used as a temporary object for +/// representing any nested MLIR construct that is "related to" an mlir::Value +/// (for now an induction variable). +/// This is extensible and will evolve in the future as MLIR evolves, hence +/// the name LoopBuilder (as opposed to say ForBuilder or AffineForBuilder). +class LoopBuilder : public NestedBuilder { +public: + LoopBuilder(const LoopBuilder &) = delete; + LoopBuilder(LoopBuilder &&) = default; + + LoopBuilder &operator=(const LoopBuilder &) = delete; + LoopBuilder &operator=(LoopBuilder &&) = default; + + /// The only purpose of this operator is to serve as a sequence point so that + /// the evaluation of `fun` (which build IR snippets in a scoped fashion) is + /// scoped within a LoopBuilder. Returns the created loop operation. + void operator()(function_ref fun = nullptr); + +private: + LoopBuilder() = default; + + friend LoopBuilder makeAffineLoopBuilder(ValueHandle *iv, + ArrayRef lbHandles, + ArrayRef ubHandles, + int64_t step); + friend LoopBuilder makeAffineLoopBuilder(OperationHandle *loop, + ValueHandle *iv, + ArrayRef lbHandles, + ArrayRef ubHandles, + int64_t step); + + friend LoopBuilder makeParallelLoopBuilder(ArrayRef ivs, + ArrayRef lbHandles, + ArrayRef ubHandles, + ArrayRef steps); + friend LoopBuilder makeParallelLoopBuilder(OperationHandle *loop, + ArrayRef ivs, + ArrayRef lbHandles, + ArrayRef ubHandles, + ArrayRef steps); + + friend LoopBuilder makeLoopBuilder(ValueHandle *iv, ValueHandle lbHandle, + ValueHandle ubHandle, + ValueHandle stepHandle); + friend LoopBuilder makeLoopBuilder(OperationHandle *loop, ValueHandle *iv, + ValueHandle lbHandle, ValueHandle ubHandle, + ValueHandle stepHandle); +}; + +// This class exists solely to handle the C++ vexing parse case when +// trying to enter a Block that has already been constructed. +class Append {}; + +/// A BlockBuilder is a NestedBuilder for mlir::Block*. +/// This exists by opposition to LoopBuilder which is not related to an +/// mlir::Block* but to a mlir::Value. +/// It is meant to be used as a temporary object for representing any nested +/// MLIR construct that is "related to" an mlir::Block*. +class BlockBuilder : public NestedBuilder { +public: + /// Enters the mlir::Block* previously captured by `bh` and sets the insertion + /// point to its end. + BlockBuilder(BlockHandle bh, Append); + + /// Constructs a new mlir::Block with argument types derived from `args`. + /// Captures the new block in `bh` and its arguments into `args`. + /// Enters the new mlir::Block* and sets the insertion point to its end. + /// + /// Prerequisites: + /// The ValueHandle `args` are typed delayed ValueHandles; i.e. they are + /// not yet bound to mlir::Value. + BlockBuilder(BlockHandle *bh, ArrayRef args); + + /// Constructs a new mlir::Block with argument types derived from `args` and + /// appends it as the last block in the region. + /// Captures the new block in `bh` and its arguments into `args`. + /// Enters the new mlir::Block* and sets the insertion point to its end. + /// + /// Prerequisites: + /// The ValueHandle `args` are typed delayed ValueHandles; i.e. they are + /// not yet bound to mlir::Value. + BlockBuilder(BlockHandle *bh, Region ®ion, ArrayRef args); + + /// The only purpose of this operator is to serve as a sequence point so that + /// the evaluation of `fun` (which build IR snippets in a scoped fashion) is + /// scoped within a BlockBuilder. + void operator()(function_ref fun = nullptr); + +private: + BlockBuilder(BlockBuilder &) = delete; + BlockBuilder &operator=(BlockBuilder &other) = delete; +}; + /// Simple wrapper to build a generic operation without successor blocks. template struct CustomOperation { diff --git a/mlir/lib/Dialect/Affine/EDSC/Builders.cpp b/mlir/lib/Dialect/Affine/EDSC/Builders.cpp --- a/mlir/lib/Dialect/Affine/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Affine/EDSC/Builders.cpp @@ -14,27 +14,24 @@ using namespace mlir; using namespace mlir::edsc; -static Optional emitStaticFor(ArrayRef lbs, - ArrayRef ubs, - int64_t step) { +static AffineForOp emitStaticFor(ArrayRef lbs, + ArrayRef ubs, int64_t step) { if (lbs.size() != 1 || ubs.size() != 1) - return Optional(); + return nullptr; auto *lbDef = lbs.front().getValue().getDefiningOp(); auto *ubDef = ubs.front().getValue().getDefiningOp(); if (!lbDef || !ubDef) - return Optional(); + return nullptr; auto lbConst = dyn_cast(lbDef); auto ubConst = dyn_cast(ubDef); if (!lbConst || !ubConst) - return Optional(); + return nullptr; - return ValueHandle(ScopedContext::getBuilder() - .create(ScopedContext::getLocation(), - lbConst.getValue(), - ubConst.getValue(), step) - .getInductionVar()); + return ScopedContext::getBuilder().create( + ScopedContext::getLocation(), lbConst.getValue(), ubConst.getValue(), + step); } LoopBuilder mlir::edsc::makeAffineLoopBuilder(ValueHandle *iv, @@ -42,29 +39,69 @@ ArrayRef ubHandles, int64_t step) { mlir::edsc::LoopBuilder result; - if (auto staticFor = emitStaticFor(lbHandles, ubHandles, step)) { - *iv = staticFor.getValue(); - } else { + AffineForOp forOp = emitStaticFor(lbHandles, ubHandles, step); + if (!forOp) { + SmallVector lbs(lbHandles.begin(), lbHandles.end()); + SmallVector ubs(ubHandles.begin(), ubHandles.end()); + auto b = ScopedContext::getBuilder(); + forOp = b.create(ScopedContext::getLocation(), lbs, + b.getMultiDimIdentityMap(lbs.size()), ubs, + b.getMultiDimIdentityMap(ubs.size()), step); + } + *iv = ValueHandle(forOp.getInductionVar()); + auto *body = getForInductionVarOwner(iv->getValue()).getBody(); + result.enter(body, /*prev=*/1); + return result; +} + +LoopBuilder mlir::edsc::makeAffineLoopBuilder(OperationHandle *loop, + ValueHandle *iv, + ArrayRef lbHandles, + ArrayRef ubHandles, + int64_t step) { + mlir::edsc::LoopBuilder result; + AffineForOp forOp = emitStaticFor(lbHandles, ubHandles, step); + if (!forOp) { SmallVector lbs(lbHandles.begin(), lbHandles.end()); SmallVector ubs(ubHandles.begin(), ubHandles.end()); auto b = ScopedContext::getBuilder(); - *iv = ValueHandle( - b.create(ScopedContext::getLocation(), lbs, - b.getMultiDimIdentityMap(lbs.size()), ubs, - b.getMultiDimIdentityMap(ubs.size()), step) - .getInductionVar()); + forOp = b.create(ScopedContext::getLocation(), lbs, + b.getMultiDimIdentityMap(lbs.size()), ubs, + b.getMultiDimIdentityMap(ubs.size()), step); } + *iv = ValueHandle(forOp.getInductionVar()); + *loop = OperationHandle(forOp); auto *body = getForInductionVarOwner(iv->getValue()).getBody(); result.enter(body, /*prev=*/1); return result; } +mlir::edsc::AffineLoopNestBuilder::AffineLoopNestBuilder( + OperationHandle *loopHandle, ValueHandle *iv, ArrayRef lbs, + ArrayRef ubs, int64_t step) { + loops.emplace_back(makeAffineLoopBuilder(loopHandle, iv, lbs, ubs, step)); +} + mlir::edsc::AffineLoopNestBuilder::AffineLoopNestBuilder( ValueHandle *iv, ArrayRef lbs, ArrayRef ubs, int64_t step) { loops.emplace_back(makeAffineLoopBuilder(iv, lbs, ubs, step)); } +mlir::edsc::AffineLoopNestBuilder::AffineLoopNestBuilder( + ArrayRef loopHandles, ArrayRef ivs, + ArrayRef lbs, ArrayRef ubs, + ArrayRef steps) { + assert(loopHandles.size() == ivs.size() && "Mismatch in number of arguments"); + assert(ivs.size() == lbs.size() && "Mismatch in number of arguments"); + assert(ivs.size() == ubs.size() && "Mismatch in number of arguments"); + assert(ivs.size() == steps.size() && "Mismatch in number of arguments"); + for (auto it : llvm::zip(loopHandles, ivs, lbs, ubs, steps)) + loops.emplace_back(makeAffineLoopBuilder(std::get<0>(it), std::get<1>(it), + std::get<2>(it), std::get<3>(it), + std::get<4>(it))); +} + mlir::edsc::AffineLoopNestBuilder::AffineLoopNestBuilder( ArrayRef ivs, ArrayRef lbs, ArrayRef ubs, ArrayRef steps) { diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp --- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp @@ -36,6 +36,23 @@ enter(body, /*prev=*/1); } +mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(OperationHandle *loop, + ValueHandle *iv, + ValueHandle range) { + assert(range.getType() && "expected !linalg.range type"); + assert(range.getValue().getDefiningOp() && + "need operations to extract range parts"); + auto rangeOp = cast(range.getValue().getDefiningOp()); + auto lb = rangeOp.min(); + auto ub = rangeOp.max(); + auto step = rangeOp.step(); + auto forOp = OperationHandle::createOp(lb, ub, step); + *iv = ValueHandle(forOp.getInductionVar()); + *loop = OperationHandle(forOp); + auto *body = forOp.getBody(); + enter(body, /*prev=*/1); +} + mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(ValueHandle *iv, SubViewOp::Range range) { auto forOp = @@ -45,6 +62,17 @@ enter(body, /*prev=*/1); } +mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(OperationHandle *loop, + ValueHandle *iv, + SubViewOp::Range range) { + auto forOp = + OperationHandle::createOp(range.offset, range.size, range.stride); + *iv = ValueHandle(forOp.getInductionVar()); + *loop = OperationHandle(forOp); + auto *body = forOp.getBody(); + enter(body, /*prev=*/1); +} + ValueHandle mlir::edsc::LoopRangeBuilder::operator()(std::function fun) { if (fun) @@ -62,6 +90,17 @@ assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); } +mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder( + ArrayRef loopHandles, ArrayRef ivs, + ArrayRef ranges) { + loops.reserve(ranges.size()); + for (unsigned i = 0, e = ranges.size(); i < e; ++i) { + loops.emplace_back(loopHandles[i], ivs[i], ranges[i]); + } + assert(loops.size() == loopHandles.size() && "Mismatch loop vs loopHandles"); + assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); +} + mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder( ArrayRef ivs, ArrayRef ranges) { loops.reserve(ranges.size()); @@ -71,17 +110,36 @@ assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); } +mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder( + ArrayRef loopHandles, ArrayRef ivs, + ArrayRef ranges) { + loops.reserve(ranges.size()); + for (unsigned i = 0, e = ranges.size(); i < e; ++i) { + loops.emplace_back(loopHandles[i], ivs[i], ranges[i]); + } + assert(loops.size() == loopHandles.size() && + "Mismatch loops vs loopHandle size"); + assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); +} + mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder( ArrayRef ivs, ArrayRef ranges) : LoopNestRangeBuilder( ivs, SmallVector(ranges.begin(), ranges.end())) {} +mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder( + ArrayRef loopHandles, ArrayRef ivs, + ArrayRef ranges) + : LoopNestRangeBuilder( + loopHandles, ivs, + SmallVector(ranges.begin(), ranges.end())) {} + ValueHandle LoopNestRangeBuilder::LoopNestRangeBuilder::operator()( std::function fun) { if (fun) fun(); - for (auto &lit : reverse(loops)) { - lit({}); + for (auto &lit : enumerate(reverse(loops))) { + lit.value()({}); } return ValueHandle::null(); } @@ -91,13 +149,21 @@ template <> GenericLoopNestRangeBuilder::GenericLoopNestRangeBuilder( - ArrayRef ivs, ArrayRef ranges) { + ArrayRef ivs, ArrayRef ranges) { builder = std::make_unique(ivs, ranges); } +template <> +GenericLoopNestRangeBuilder::GenericLoopNestRangeBuilder( + ArrayRef loopHandles, ArrayRef ivs, + ArrayRef ranges) { + builder = std::make_unique(loopHandles, ivs, ranges); +} + template <> GenericLoopNestRangeBuilder::GenericLoopNestRangeBuilder( - ArrayRef ivs, ArrayRef ranges) { + ArrayRef loopHandles, ArrayRef ivs, + ArrayRef ranges) { SmallVector lbs; SmallVector ubs; SmallVector steps; @@ -109,7 +175,8 @@ ubs.emplace_back(rangeOp.max()); steps.emplace_back(rangeOp.step()); } - builder = std::make_unique(ivs, lbs, ubs, steps); + builder = std::make_unique(loopHandles, ivs, lbs, ubs, + steps); } template <> @@ -127,6 +194,26 @@ builder = std::make_unique(ivs, lbs, ubs, steps); } +template <> +GenericLoopNestRangeBuilder::GenericLoopNestRangeBuilder( + ArrayRef loopHandles, ArrayRef ivs, + ArrayRef ranges) { + assert( + loopHandles.size() == 1 && + "expected loopHandles to be of unit size when lowering to loop.parallel"); + SmallVector lbs, ubs, steps; + for (Value range : ranges) { + assert(range.getType() && "expected linalg.range type"); + assert(range.getDefiningOp() && "need operations to extract range parts"); + RangeOp rangeOp = cast(range.getDefiningOp()); + lbs.emplace_back(rangeOp.min()); + ubs.emplace_back(rangeOp.max()); + steps.emplace_back(rangeOp.step()); + } + builder = std::make_unique(loopHandles[0], ivs, lbs, + ubs, steps); +} + } // namespace edsc } // namespace mlir diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp @@ -534,26 +534,137 @@ // consequence, (1) it is only allowed to emit new ops if the match is // guaranteed to be a success, (2) it is not allowed erase/replace, and (3) an // encompassing pattern must take care of the erasure logic. -template -class LinalgOpToLoopsImpl { +template class LinalgOpToLoopsImpl { public: - static LogicalResult doit(Operation *op, PatternRewriter &rewriter); + static Optional doit(Operation *op, PatternRewriter &rewriter); }; -template -bool loweringIsAllowed(int numParallelLoops, int numLoops) { - return true; -} -template <> -bool loweringIsAllowed(int numParallelLoops, int numLoops) { - return numParallelLoops == numLoops; -} +namespace { +/// Helper struct to generate the loop nest for the op. This factored out here +/// to be able to partially specialize this for different LoopTy. +template class GenerateLoopNest { +public: + using IndexedValueTy = + typename std::conditional::value, + AffineIndexedValue, StdIndexedValue>::type; + static Optional doit(ConcreteOpTy linalgOp, + ArrayRef loopRanges, + MutableArrayRef allIvs) { + auto nPar = linalgOp.getNumParallelLoops(); + auto nRed = linalgOp.getNumReductionLoops(); + auto nWin = linalgOp.getNumWindowLoops(); + auto nLoops = nPar + nRed + nWin; + SmallVector allPIvs = + makeHandlePointers(MutableArrayRef(allIvs)); + + SmallVector allLoops(nLoops, OperationHandle()); + SmallVector allPLoops; + allPLoops.reserve(allLoops.size()); + for (OperationHandle &loop : allLoops) + allPLoops.push_back(&loop); + GenericLoopNestRangeBuilder(allPLoops, allPIvs, loopRanges)([&] { + SmallVector allIvValues(allIvs.begin(), allIvs.end()); + LinalgScopedEmitter::emitScalarImplementation(allIvValues, + linalgOp); + }); + // Number of loop ops might be different from the number of ivs since some + // loops have multiple ivs. + LinalgLoops loops; + for (OperationHandle loop : allLoops) { + if (Operation *op = loop.getOperation()) + loops.push_back(op); + } + return loops; + } +}; -template -LogicalResult LinalgOpToLoopsImpl::doit( - Operation *op, PatternRewriter &rewriter) { - OpBuilder b(op); - ScopedContext scope(b, op->getLoc()); +/// Generates loops nest using loop.parallel. loop.parallel is only used for the +/// outer parallel loops. All other loops are generated using loop.for +/// operation. +template +class GenerateLoopNest { +public: + using IndexedValueTy = StdIndexedValue; + + static Optional doit(ConcreteOpTy linalgOp, + ArrayRef loopRanges, + MutableArrayRef allIvs) { + // Only generate loop.parallel for outer consecutive "parallel" + // iterator_types. + // TODO(ravishankarm): Generate loop.parallel for all "parallel" iterator + // types. + auto nPar = linalgOp.getNumParallelLoops(); + auto nRed = linalgOp.getNumReductionLoops(); + auto nWin = linalgOp.getNumWindowLoops(); + auto nLoops = nPar + nRed + nWin; + auto nOuterPar = linalgOp.iterator_types() + .getValue() + .take_while([](Attribute attr) { + return attr.cast().getValue() == + getParallelIteratorTypeName(); + }) + .size(); + // If there are no outer parallel loops, then number of loop ops is same as + // the number of loops, and they are all loop.for ops. + auto nLoopOps = (nOuterPar ? nLoops - nOuterPar + 1 : nLoops); + SmallVector allPIvs = + makeHandlePointers(MutableArrayRef(allIvs)); + + SmallVector allLoops(nLoopOps, OperationHandle()); + SmallVector allPLoops; + allPLoops.reserve(allLoops.size()); + for (OperationHandle &loop : allLoops) + allPLoops.push_back(&loop); + + ArrayRef allPIvsRef(allPIvs); + ArrayRef allPLoopsRef(allPLoops); + + if (nOuterPar) { + GenericLoopNestRangeBuilder( + allPLoopsRef[0], allPIvsRef.take_front(nOuterPar), + loopRanges.take_front(nOuterPar))([&] { + GenericLoopNestRangeBuilder( + allPLoopsRef.drop_front(1), allPIvsRef.drop_front(nOuterPar), + loopRanges.drop_front(nOuterPar))([&] { + SmallVector allIvValues(allIvs.begin(), allIvs.end()); + LinalgScopedEmitter:: + emitScalarImplementation(allIvValues, linalgOp); + }); + }); + } else { + // If there are no parallel loops then fallback to generating all loop.for + // operations. + GenericLoopNestRangeBuilder(allPLoopsRef, allPIvsRef, + loopRanges)([&] { + SmallVector allIvValues(allIvs.begin(), allIvs.end()); + LinalgScopedEmitter::emitScalarImplementation(allIvValues, + linalgOp); + }); + } + // Number of loop ops might be different from the number of ivs since some + // loops have multiple ivs. + LinalgLoops loops; + for (OperationHandle loop : allLoops) { + if (Operation *op = loop.getOperation()) + loops.push_back(op); + } + return loops; + } +}; +} // namespace + +template +Optional +LinalgOpToLoopsImpl::doit(Operation *op, + PatternRewriter &rewriter) { + using Impl = GenerateLoopNest; + using IndexedValueTy = + typename GenerateLoopNest::IndexedValueTy; + + // using IndexedValueTy = typename Impl::IndexedValueTy; + ScopedContext scope(rewriter, op->getLoc()); // The flattened loopToOperandRangesMaps is expected to be an invertible // permutation map (which is asserted in the inverse calculation). @@ -564,8 +675,6 @@ auto nRed = linalgOp.getNumReductionLoops(); auto nWin = linalgOp.getNumWindowLoops(); auto nLoops = nPar + nRed + nWin; - if (!loweringIsAllowed(nPar, nLoops)) - return failure(); auto mapsRange = linalgOp.indexing_maps().template getAsRange(); auto maps = @@ -574,25 +683,19 @@ if (!invertedMap) { LinalgScopedEmitter::emitScalarImplementation( {}, linalgOp); - return success(); + return LinalgLoops(); } - SmallVector allIvs(nLoops, ValueHandle(b.getIndexType())); - SmallVector allPIvs = - makeHandlePointers(MutableArrayRef(allIvs)); - auto loopRanges = emitLoopRanges(scope.getBuilder(), scope.getLocation(), - invertedMap, getViewSizes(b, linalgOp)); + SmallVector allIvs(nLoops, + ValueHandle(rewriter.getIndexType())); + auto loopRanges = + emitLoopRanges(scope.getBuilder(), scope.getLocation(), invertedMap, + getViewSizes(rewriter, linalgOp)); assert(loopRanges.size() == allIvs.size()); - - GenericLoopNestRangeBuilder(allPIvs, loopRanges)([&] { - SmallVector allIvValues(allIvs.begin(), allIvs.end()); - LinalgScopedEmitter::emitScalarImplementation( - allIvValues, linalgOp); - }); - return success(); + return Impl::doit(linalgOp, loopRanges, allIvs); } -template +template class LinalgRewritePattern : public RewritePattern { public: explicit LinalgRewritePattern(MLIRContext *context) @@ -600,8 +703,9 @@ LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { - using Impl = LinalgOpToLoopsImpl; - if (failed(Impl::doit(op, rewriter))) + using Impl = LinalgOpToLoopsImpl; + Optional loops = Impl::doit(op, rewriter); + if (!loops) return failure(); rewriter.eraseOp(op); return success(); @@ -609,32 +713,26 @@ }; // Helper classes for type list expansion. -template -class RewritePatternList; +template class RewritePatternList; -template -class RewritePatternList { +template class RewritePatternList { public: static void build(OwningRewritePatternList &patterns, MLIRContext *ctx) {} }; -template -class RewritePatternList { +template +class RewritePatternList { public: static void build(OwningRewritePatternList &patterns, MLIRContext *ctx) { - patterns - .insert>( - ctx); - RewritePatternList::build( - patterns, ctx); + patterns.insert>(ctx); + RewritePatternList::build(patterns, ctx); } }; /// Populate the given list with patterns that convert from Linalg to LLVM. -template +template void FillRewritePatterns(OwningRewritePatternList &patterns, MLIRContext *ctx) { - RewritePatternList::build(patterns, ctx); @@ -678,13 +776,13 @@ }; } // namespace -template +template static void lowerLinalgToLoopsImpl(Operation *op, MLIRContext *context) { OwningRewritePatternList patterns; // Canonicalization and folding patterns applied greedily allow cleaning up // the emitted IR on the fly. // TODO(ntv) fold view and subview ops? - FillRewritePatterns(patterns, context); + FillRewritePatterns(patterns, context); DimOp::getCanonicalizationPatterns(patterns, context); AffineApplyOp::getCanonicalizationPatterns(patterns, context); patterns.insert(context); @@ -699,8 +797,7 @@ #include "mlir/Dialect/Linalg/Passes.h.inc" void runOnFunction() override { - lowerLinalgToLoopsImpl(getFunction(), - &getContext()); + lowerLinalgToLoopsImpl(getFunction(), &getContext()); } }; struct LowerToLoops : public FunctionPass { @@ -709,8 +806,7 @@ #include "mlir/Dialect/Linalg/Passes.h.inc" void runOnFunction() override { - lowerLinalgToLoopsImpl(getFunction(), - &getContext()); + lowerLinalgToLoopsImpl(getFunction(), &getContext()); } }; struct LowerToParallelLoops : public FunctionPass { @@ -719,8 +815,7 @@ #include "mlir/Dialect/Linalg/Passes.h.inc" void runOnFunction() override { - lowerLinalgToLoopsImpl(getFunction(), - &getContext()); + lowerLinalgToLoopsImpl(getFunction(), &getContext()); } }; } // namespace @@ -739,28 +834,38 @@ return std::make_unique(); } +/// Emits a loop nest with the proper body for `op`. +template +Optional +mlir::linalg::linalgLowerOpToLoops(PatternRewriter &rewriter, ConcreteOp op) { + return LinalgOpToLoopsImpl::doit(op, rewriter); +} + /// Emits a loop nest of `loop.for` with the proper body for `op`. template LogicalResult mlir::linalg::linalgOpToLoops(PatternRewriter &rewriter, Operation *op) { - return LinalgOpToLoopsImpl::doit( - op, rewriter); + Optional loops = + LinalgOpToLoopsImpl::doit(op, rewriter); + return loops ? success() : failure(); } /// Emits a loop nest of `affine.for` with the proper body for `op`. template LogicalResult mlir::linalg::linalgOpToAffineLoops(PatternRewriter &rewriter, Operation *op) { - return LinalgOpToLoopsImpl::doit( - op, rewriter); + Optional loops = + LinalgOpToLoopsImpl::doit(op, rewriter); + return loops ? success() : failure(); } /// Emits a loop nest of `loop.parallel` with the proper body for `op`. template LogicalResult mlir::linalg::linalgOpToParallelLoops(PatternRewriter &rewriter, Operation *op) { - return LinalgOpToLoopsImpl::doit(op, rewriter); + Optional loops = + LinalgOpToLoopsImpl::doit(op, rewriter); + return loops ? success() : failure(); } // TODO(ntv) Need to make these instantiations more future-proof to avoid the @@ -769,6 +874,8 @@ template LogicalResult mlir::linalg::linalgOpToLoops( \ PatternRewriter & rewriter, Operation * op); \ template LogicalResult mlir::linalg::linalgOpToAffineLoops( \ + PatternRewriter & rewriter, Operation * op); \ + template LogicalResult mlir::linalg::linalgOpToParallelLoops( \ PatternRewriter & rewriter, Operation * op); INSTANTIATE_LINALG_OP_TO_LOOPS(CopyOp) @@ -782,9 +889,3 @@ INSTANTIATE_LINALG_OP_TO_LOOPS(PoolingSumOp) INSTANTIATE_LINALG_OP_TO_LOOPS(GenericOp) INSTANTIATE_LINALG_OP_TO_LOOPS(IndexedGenericOp) - -// TODO(pifon): Enable lowering to parallel loops for ops other than -// linalg.generic for now to be on the safe side. -template LogicalResult -mlir::linalg::linalgOpToParallelLoops(PatternRewriter &rewriter, - Operation *op); diff --git a/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp b/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp --- a/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp @@ -23,6 +23,17 @@ loops.emplace_back(makeParallelLoopBuilder(ivs, lbs, ubs, steps)); } +mlir::edsc::ParallelLoopNestBuilder::ParallelLoopNestBuilder( + OperationHandle *loop, ArrayRef ivs, + ArrayRef lbs, ArrayRef ubs, + ArrayRef steps) { + assert(ivs.size() == lbs.size() && "Mismatch in number of arguments"); + assert(ivs.size() == ubs.size() && "Mismatch in number of arguments"); + assert(ivs.size() == steps.size() && "Mismatch in number of arguments"); + + loops.emplace_back(makeParallelLoopBuilder(loop, ivs, lbs, ubs, steps)); +} + void mlir::edsc::ParallelLoopNestBuilder::operator()( function_ref fun) { if (fun) @@ -52,6 +63,26 @@ assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); } +mlir::edsc::LoopNestBuilder::LoopNestBuilder(ArrayRef loop, + ArrayRef ivs, + ArrayRef lbs, + ArrayRef ubs, + ArrayRef steps) { + assert(ivs.size() == loop.size() && + "expected size of ivs and loops to match"); + assert(ivs.size() == lbs.size() && "expected size of ivs and lbs to match"); + assert(ivs.size() == ubs.size() && "expected size of ivs and ubs to match"); + assert(ivs.size() == steps.size() && + "expected size of ivs and steps to match"); + loops.reserve(ivs.size()); + for (auto it : llvm::zip(loop, ivs, lbs, ubs, steps)) { + loops.emplace_back(makeLoopBuilder(std::get<0>(it), std::get<1>(it), + std::get<2>(it), std::get<3>(it), + std::get<4>(it))); + } + assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); +} + void mlir::edsc::LoopNestBuilder::LoopNestBuilder::operator()( std::function fun) { if (fun) @@ -60,7 +91,8 @@ lit({}); } -LoopBuilder mlir::edsc::makeParallelLoopBuilder(ArrayRef ivs, +LoopBuilder mlir::edsc::makeParallelLoopBuilder(OperationHandle *loop, + ArrayRef ivs, ArrayRef lbHandles, ArrayRef ubHandles, ArrayRef steps) { @@ -74,11 +106,22 @@ cast(*opHandle.getOperation()); for (size_t i = 0, e = ivs.size(); i < e; ++i) *ivs[i] = ValueHandle(parallelOp.getBody()->getArgument(i)); + *loop = OperationHandle(parallelOp); result.enter(parallelOp.getBody(), /*prev=*/1); return result; } -mlir::edsc::LoopBuilder mlir::edsc::makeLoopBuilder(ValueHandle *iv, +LoopBuilder mlir::edsc::makeParallelLoopBuilder(ArrayRef ivs, + ArrayRef lbHandles, + ArrayRef ubHandles, + ArrayRef steps) { + OperationHandle loop; + return mlir::edsc::makeParallelLoopBuilder(&loop, ivs, lbHandles, ubHandles, + steps); +} + +mlir::edsc::LoopBuilder mlir::edsc::makeLoopBuilder(OperationHandle *loop, + ValueHandle *iv, ValueHandle lbHandle, ValueHandle ubHandle, ValueHandle stepHandle) { @@ -86,7 +129,16 @@ auto forOp = OperationHandle::createOp(lbHandle, ubHandle, stepHandle); *iv = ValueHandle(forOp.getInductionVar()); + *loop = OperationHandle(forOp); auto *body = loop::getForInductionVarOwner(iv->getValue()).getBody(); result.enter(body, /*prev=*/1); return result; } + +mlir::edsc::LoopBuilder mlir::edsc::makeLoopBuilder(ValueHandle *iv, + ValueHandle lbHandle, + ValueHandle ubHandle, + ValueHandle stepHandle) { + OperationHandle loop; + return mlir::edsc::makeLoopBuilder(&loop, iv, lbHandle, ubHandle, stepHandle); +} diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -1,18 +1,30 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck %s +// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck --check-prefix=CHECKLOOP %s +// RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s // Test that we can lower all the way to LLVM without crashing, don't check results here. // RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1 -// CHECK-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> -// CHECK-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> -// CHECK-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> -// CHECK-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> -// CHECK-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> +// CHECKLOOP-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECKLOOP-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECKLOOP-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> +// CHECKLOOP-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> +// CHECKLOOP-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> -// CHECK-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> -// CHECK-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> -// CHECK-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKLOOP-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECKLOOP-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> +// CHECKLOOP-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> +// CHECKLOOP-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> + +// CHECKPARALLEL-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECKPARALLEL-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECKPARALLEL-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> +// CHECKPARALLEL-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> +// CHECKPARALLEL-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> + +// CHECKPARALLEL-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECKPARALLEL-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> +// CHECKPARALLEL-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> +// CHECKPARALLEL-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> func @matmul(%arg0: memref, %M: index, %N: index, %K: index) { @@ -24,22 +36,40 @@ linalg.matmul(%A, %B, %C) : memref, memref, memref return } -// CHECK-LABEL: func @matmul(%{{.*}}: memref, -// CHECK-SAME: [[M:arg[0-9]+]]: index -// CHECK-SAME: [[N:arg[0-9]+]]: index -// CHECK-SAME: [[K:arg[0-9]+]]: index -// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @matmul(%{{.*}}: memref, +// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index +// CHECKLOOP-SAME: [[N:arg[0-9]+]]: index +// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index +// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @matmul(%{{.*}}: memref, +// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index +// CHECKPARALLEL-SAME: [[N:arg[0-9]+]]: index +// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index +// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref + + func @matvec(%arg0: memref, %M: index, %N: index) { %c0 = constant 0 : index @@ -50,20 +80,36 @@ linalg.matvec(%2, %3, %4) : memref, memref, memref return } -// CHECK-LABEL: func @matvec(%{{.*}}: memref, -// CHECK-SAME: [[M:arg[0-9]+]]: index -// CHECK-SAME: [[K:arg[0-9]+]]: index -// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref -// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref -// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: store %[[res]], %[[C]][%{{.*}}] : memref +// CHECKLOOP-LABEL: func @matvec(%{{.*}}: memref, +// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index +// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index +// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref, +// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index +// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index +// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref +// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}] : memref + func @dot(%arg0: memref, %M: index) { %c0 = constant 0 : index @@ -74,78 +120,126 @@ linalg.dot(%1, %2, %3) : memref, memref, memref return } -// CHECK-LABEL: func @dot(%{{.*}}: memref, -// CHECK-SAME: [[K:arg[0-9]+]]: index -// CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECK: %[[C:.*]] = std.view %{{.*}}[][] : memref to memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref -// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = load %[[C]][] : memref -// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: store %[[res]], %[[C]][] : memref +// CHECKLOOP-LABEL: func @dot(%{{.*}}: memref, +// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index +// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[][] : memref to memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref +// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][] : memref +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %[[C]][] : memref + +// CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref, +// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index +// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[][] : memref to memref +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][] : memref +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[C]][] : memref + func @dot_view(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.dot(%arg0, %arg1, %arg2) : memref, memref, memref return } -// CHECK-LABEL: func @dot_view( -// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %[[K:.*]] = dim %arg0, 0 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref -// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = load %{{.*}}[] : memref -// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: store %[[res]], %{{.*}}[] : memref +// CHECKLOOP-LABEL: func @dot_view( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %[[K:.*]] = dim %arg0, 0 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref +// CHECKLOOP-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP-DAG: %[[c:.*]] = load %{{.*}}[] : memref +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %{{.*}}[] : memref + +// CHECKPARALLEL-LABEL: func @dot_view( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 0 : memref +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL-DAG: %[[c:.*]] = load %{{.*}}[] : memref +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %{{.*}}[] : memref func @fill_view(%arg0: memref, %arg1: f32) { linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECK-LABEL: func @fill_view( -// CHECK: %{{.*}}: memref, %{{.*}}: f32) { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}] : memref +// CHECKLOOP-LABEL: func @fill_view( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: f32) { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @fill_view( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: f32) { +// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}] : memref func @fill_view0(%arg0: memref, %arg1: f32) { linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECK-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { -// CHECK: store %{{.*}}, %{{.*}}[] : memref +// CHECKLOOP-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { +// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref + +// CHECKPARALLEL-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref func @fill_view3(%arg0: memref, %arg1: f32) { linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECK-LABEL: func @fill_view3( -// CHECK: %{{.*}}: memref, %{{.*}}: f32) { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @fill_view3( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: f32) { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @fill_view3( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: f32) { +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref func @copy_view(%arg0: memref, %arg1: memref) { linalg.copy(%arg0, %arg1) : memref, memref return } -// CHECK-LABEL: func @copy_view( -// CHECK: %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref -// CHECK: store %[[L]], %{{.*}}[%{{.*}}] : memref +// CHECKLOOP-LABEL: func @copy_view( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref +// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @copy_view( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { +// CHECKPARALLEL: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref +// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}] : memref func @copy_view0(%arg0: memref, %arg1: memref) { linalg.copy(%arg0, %arg1) : memref, memref return } -// CHECK-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %{{.*}} = load %{{.*}}[] : memref -// CHECK: store %{{.*}}, %{{.*}}[] : memref +// CHECKLOOP-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %{{.*}} = load %{{.*}}[] : memref +// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref + +// CHECKPARALLEL-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[] : memref +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref func @copy_view3(%arg0: memref, %arg1: memref) { linalg.copy(%arg0, %arg1) {inputPermutation = affine_map<(i, j, k) -> (i, k, j)>, @@ -153,66 +247,113 @@ memref, memref return } -// CHECK-LABEL: func @copy_view3 -// CHECK: (%{{.*}}: memref, %{{.*}}: memref) { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @copy_view3 +// CHECKLOOP: (%{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECKLOOP: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @copy_view3 +// CHECKPARALLEL: (%{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref func @conv_view3(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref, memref, memref return } -// CHECK-LABEL: func @conv_view3( -// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECK: %[[Q:.*]] = dim %arg0, 1 : memref -// CHECK: %[[K:.*]] = dim %arg0, 2 : memref -// CHECK: %[[B:.*]] = dim %arg1, 0 : memref -// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECK: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @conv_view3( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKLOOP: %[[Q:.*]] = dim %arg0, 1 : memref +// CHECKLOOP: %[[K:.*]] = dim %arg0, 2 : memref +// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @conv_view3( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 1 : memref +// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 2 : memref +// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref func @conv_view4(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref, memref, memref return } -// CHECK-LABEL: func @conv_view4( -// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECK: %[[Z1:.*]] = dim %arg0, 1 : memref -// CHECK: %[[Q:.*]] = dim %arg0, 2 : memref -// CHECK: %[[K:.*]] = dim %arg0, 3 : memref -// CHECK: %[[B:.*]] = dim %arg1, 0 : memref -// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECK: %[[X1:.*]] = dim %arg2, 2 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECK: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}}) -// CHECK: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}}) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @conv_view4( +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKLOOP: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECKLOOP: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECKLOOP: %[[K:.*]] = dim %arg0, 3 : memref +// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKLOOP: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @conv_view4( +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKPARALLEL: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 3 : memref +// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref + func @conv_padding(%arg0: memref, %arg1: memref, @@ -223,34 +364,60 @@ memref, memref, memref return } -// CHECK-LABEL: func @conv_padding -// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: %[[ZERO:.*]] = constant 0.000000e+00 : f32 -// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECK: %[[Z1:.*]] = dim %arg0, 1 : memref -// CHECK: %[[Q:.*]] = dim %arg0, 2 : memref -// CHECK: %[[K:.*]] = dim %arg0, 3 : memref -// CHECK: %[[B:.*]] = dim %arg1, 0 : memref -// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECK: %[[X1:.*]] = dim %arg2, 2 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECK: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) -// CHECK: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref -// CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @conv_padding +// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKLOOP: %[[ZERO:.*]] = constant 0.000000e+00 : f32 +// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKLOOP: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECKLOOP: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECKLOOP: %[[K:.*]] = dim %arg0, 3 : memref +// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKLOOP: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) +// CHECKLOOP: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @conv_padding +// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECKPARALLEL: %[[ZERO:.*]] = constant 0.000000e+00 : f32 +// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECKPARALLEL: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 3 : memref +// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref func @pooling_max(%arg0: memref, %arg1: memref, @@ -259,21 +426,36 @@ memref, memref, memref return } -// CHECK-LABEL: func @pooling_max -// CHECK: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECK: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECK: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECK: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECK: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECK: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECK: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @pooling_max +// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_max +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref func @pooling_min(%arg0: memref, %arg1: memref, @@ -282,21 +464,36 @@ memref, memref, memref return } -// CHECK-LABEL: func @pooling_min -// CHECK: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECK: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECK: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECK: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECK: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECK: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECK: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @pooling_min +// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_min +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref func @pooling_sum(%arg0: memref, %arg1: memref, @@ -305,21 +502,36 @@ memref, memref, memref return } -// CHECK-LABEL: func @pooling_sum -// CHECK: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECK: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECK: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECK: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECK: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECK: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECK: %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECK: %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECK: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 -// CHECK: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP-LABEL: func @pooling_sum +// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKLOOP: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 +// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref + +// CHECKPARALLEL-LABEL: func @pooling_sum +// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref +// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref +// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref +// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref +// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { +// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 +// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref func @foo(%0: f32, %1: f32, %2: f32) -> (f32, f32) { %f0 = constant 0.0 : f32 @@ -344,17 +556,27 @@ memref, memref, memref return } -// CHECK-LABEL: @foo -// CHECK-LABEL: @generic_function -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: loop.for %[[k:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECK: %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32) -// CHECK: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP-LABEL: @foo +// CHECKLOOP-LABEL: @generic_function +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP: %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32) +// CHECKLOOP: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref + +// CHECKPARALLEL-LABEL: @foo +// CHECKPARALLEL-LABEL: @generic_function +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKPARALLEL: %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32) +// CHECKPARALLEL: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref #trait2 = { args_in = 1, @@ -373,17 +595,27 @@ }: memref, memref, memref return } -// CHECK-LABEL: @generic_region -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: loop.for %[[k:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECK: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK: %[[e:.*]] = addf %[[c]], %[[d]] : f32 -// CHECK: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP-LABEL: @generic_region +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP: %[[e:.*]] = addf %[[c]], %[[d]] : f32 +// CHECKLOOP: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref + +// CHECKPARALLEL-LABEL: @generic_region +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKPARALLEL: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL: %[[e:.*]] = addf %[[c]], %[[d]] : f32 +// CHECKPARALLEL: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref func @indexed_foo(%i: index, %j: index, %k: index, %0: f32, %1: f32, %2: f32) -> (f32, f32) { %i_int = index_cast %i: index to i32 @@ -409,17 +641,27 @@ memref return } -// CHECK-LABEL: @indexed_foo -// CHECK-LABEL: @indexed_generic_function -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: loop.for %[[k:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECK: %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32) -// CHECK: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECK: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP-LABEL: @indexed_foo +// CHECKLOOP-LABEL: @indexed_generic_function +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKLOOP: %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32) +// CHECKLOOP: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKLOOP: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref + +// CHECKPARALLEL-LABEL: @indexed_foo +// CHECKPARALLEL-LABEL: @indexed_generic_function +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECKPARALLEL: %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32) +// CHECKPARALLEL: store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECKPARALLEL: store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref #trait4 = { args_in = 1, @@ -450,21 +692,35 @@ return } -// CHECK-LABEL: @indexed_generic_region -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: loop.for %[[k:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] -// CHECK: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECK: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] -// CHECK: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECK: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index -// CHECK: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 -// CHECK: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 -// CHECK: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 -// CHECK: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECK: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] +// CHECKLOOP-LABEL: @indexed_generic_region +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] +// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] +// CHECKLOOP: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECKLOOP: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index +// CHECKLOOP: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 +// CHECKLOOP: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 +// CHECKLOOP: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 +// CHECKLOOP: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECKLOOP: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] + +// CHECKPARALLEL-LABEL: @indexed_generic_region +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] +// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] +// CHECKPARALLEL: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECKPARALLEL: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECKPARALLEL: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index +// CHECKPARALLEL: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 +// CHECKPARALLEL: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 +// CHECKPARALLEL: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 +// CHECKPARALLEL: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECKPARALLEL: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] // ----- @@ -490,13 +746,20 @@ return } -// CHECK-LABEL: @generic_op_zero_rank -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %[[ARG0]][] -// CHECK: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] +// CHECKLOOP-LABEL: @generic_op_zero_rank +// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][] +// CHECKLOOP: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] + +// CHECKPARALLEL-LABEL: @generic_op_zero_rank +// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][] +// CHECKPARALLEL: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] func @indexed_generic_op_zero_rank(%arg0: memref, %arg1: memref<3x4xi32>) { @@ -510,16 +773,26 @@ return } -// CHECK-LABEL: @indexed_generic_op_zero_rank -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: loop.for %[[j:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %[[ARG0]][ -// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECK: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 -// CHECK: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 -// CHECK: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] +// CHECKLOOP-LABEL: @indexed_generic_op_zero_rank +// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][ +// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECKLOOP: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 +// CHECKLOOP: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 +// CHECKLOOP: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] + +// CHECKPARALLEL-LABEL: @indexed_generic_op_zero_rank +// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> +// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) +// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][ +// CHECKPARALLEL: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECKPARALLEL: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 +// CHECKPARALLEL: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 +// CHECKPARALLEL: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] #reduce_1D_access = [ affine_map<(i) -> (i)>, @@ -543,14 +816,23 @@ } : memref, memref return } -// CHECK-LABEL: @generic_op_1D_reduce -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %[[ARG0]][%[[i]]] -// CHECK: %[[b:.*]] = load %[[ARG1]][] -// CHECK: %[[c:.*]] = addf %[[a]], %[[b]] : f32 -// CHECK: store %[[c]], %[[ARG1]][] +// CHECKLOOP-LABEL: @generic_op_1D_reduce +// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][%[[i]]] +// CHECKLOOP: %[[b:.*]] = load %[[ARG1]][] +// CHECKLOOP: %[[c:.*]] = addf %[[a]], %[[b]] : f32 +// CHECKLOOP: store %[[c]], %[[ARG1]][] + +// CHECKPARALLEL-LABEL: @generic_op_1D_reduce +// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}} +// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][%[[i]]] +// CHECKPARALLEL: %[[b:.*]] = load %[[ARG1]][] +// CHECKPARALLEL: %[[c:.*]] = addf %[[a]], %[[b]] : f32 +// CHECKPARALLEL: store %[[c]], %[[ARG1]][] #reduce_init_1D_access = [ @@ -581,17 +863,29 @@ } : memref, memref, memref return } -// CHECK-LABEL: @indexed_generic_op_1D_reduce -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: %[[a:.*]] = load %[[ARG0]][%[[i]]] -// CHECK: %[[b:.*]] = load %[[ARG1]][] -// CHECK: %[[c:.*]] = load %[[ARG2]][] -// CHECK: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] -// CHECK: %[[e:.*]] = addf %[[a]], %[[d]] -// CHECK: store %[[e]], %[[ARG2]][] +// CHECKLOOP-LABEL: @indexed_generic_op_1D_reduce +// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][%[[i]]] +// CHECKLOOP: %[[b:.*]] = load %[[ARG1]][] +// CHECKLOOP: %[[c:.*]] = load %[[ARG2]][] +// CHECKLOOP: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] +// CHECKLOOP: %[[e:.*]] = addf %[[a]], %[[d]] +// CHECKLOOP: store %[[e]], %[[ARG2]][] + +// CHECKPARALLEL-LABEL: @indexed_generic_op_1D_reduce +// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref +// CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}} +// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][%[[i]]] +// CHECKPARALLEL: %[[b:.*]] = load %[[ARG1]][] +// CHECKPARALLEL: %[[c:.*]] = load %[[ARG2]][] +// CHECKPARALLEL: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] +// CHECKPARALLEL: %[[e:.*]] = addf %[[a]], %[[d]] +// CHECKPARALLEL: store %[[e]], %[[ARG2]][] #trait_const_fill = { args_in = 0, @@ -601,15 +895,21 @@ library_call = "some_external_fn" } func @generic_const_init(%arg0: memref) { - %cst = constant 1.0 : f32 + %cst = constant 1.0 : f32 linalg.generic #trait_const_fill %arg0 { ^bb0(%arg1: f32): // no predecessors linalg.yield %cst : f32 }: memref return } -// CHECK-LABEL: @generic_const_init -// CHECK-SAME: %[[ARG0:.*]]: memref -// CHECK: %[[CONST:.*]] = constant 1.000000e+00 : f32 -// CHECK: loop.for %[[i:.*]] = {{.*}} -// CHECK: store %[[CONST]], %[[ARG0]] +// CHECKLOOP-LABEL: @generic_const_init +// CHECKLOOP-SAME: %[[ARG0:.*]]: memref +// CHECKLOOP: %[[CONST:.*]] = constant 1.000000e+00 : f32 +// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} +// CHECKLOOP: store %[[CONST]], %[[ARG0]] + +// CHECKPARALLEL-LABEL: @generic_const_init +// CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref +// CHECKPARALLEL: %[[CONST:.*]] = constant 1.000000e+00 : f32 +// CHECKPARALLEL: loop.parallel (%[[i:.*]]) +// CHECKPARALLEL: store %[[CONST]], %[[ARG0]] diff --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir --- a/mlir/test/Dialect/Linalg/parallel_loops.mlir +++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir @@ -32,22 +32,32 @@ // ----- #accesses = [ - affine_map<(m, n) -> (m, n)>, - affine_map<(m, n) -> (m)> + affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, + affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> ] #trait = { args_in = 1, args_out = 1, - iterator_types = ["parallel", "reduction"], + iterator_types = ["parallel", "parallel", "reduction", "parallel"], indexing_maps = #accesses } -func @do_not_lower_reduce(%A: memref<2x4xf32>, %B: memref<2xf32>) { +func @lower_outer_parallel(%A: memref, %B: memref) { linalg.generic #trait %A, %B { ^bb0(%a: f32, %b: f32): linalg.yield %a: f32 - } : memref<2x4xf32>, memref<2xf32> + } : memref, memref return } -// CHECK-LABEL: @do_not_lower_reduce -// CHECK: linalg.generic +// CHECK-LABEL: @lower_outer_parallel +// CHECK-DAG: %[[C0:.*]] = constant 0 +// CHECK-DAG: %[[C1:.*]] = constant 1 +// CHECK-DAG: %[[D0:.*]] = dim %{{.*}}, 0 +// CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, 1 +// CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, 2 +// CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, 3 +// CHECK: loop.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]]) +// CHECK: loop.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]] +// CHECK: loop.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]] +// CHECK: load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] +// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]] \ No newline at end of file