diff --git a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h --- a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h @@ -89,132 +89,6 @@ } // namespace op -/// Arithmetic operator overloadings. -template -Value TemplatedIndexedValue::operator+(Value e) { - using op::operator+; - return static_cast(*this) + e; -} -template -Value TemplatedIndexedValue::operator-(Value e) { - using op::operator-; - return static_cast(*this) - e; -} -template -Value TemplatedIndexedValue::operator*(Value e) { - using op::operator*; - return static_cast(*this) * e; -} -template -Value TemplatedIndexedValue::operator/(Value e) { - using op::operator/; - return static_cast(*this) / e; -} -template -Value TemplatedIndexedValue::operator%(Value e) { - using op::operator%; - return static_cast(*this) % e; -} -template -Value TemplatedIndexedValue::operator^(Value e) { - using op::operator^; - return static_cast(*this) ^ e; -} - -/// Assignment-arithmetic operator overloadings. -template -Store TemplatedIndexedValue::operator+=(Value e) { - using op::operator+; - return Store(*this + e, getBase(), indices); -} -template -Store TemplatedIndexedValue::operator-=(Value e) { - using op::operator-; - return Store(*this - e, getBase(), indices); -} -template -Store TemplatedIndexedValue::operator*=(Value e) { - using op::operator*; - return Store(*this * e, getBase(), indices); -} -template -Store TemplatedIndexedValue::operator/=(Value e) { - using op::operator/; - return Store(*this / e, getBase(), indices); -} -template -Store TemplatedIndexedValue::operator%=(Value e) { - using op::operator%; - return Store(*this % e, getBase(), indices); -} -template -Store TemplatedIndexedValue::operator^=(Value e) { - using op::operator^; - return Store(*this ^ e, getBase(), indices); -} - -/// Logical operator overloadings. -template -Value TemplatedIndexedValue::operator&&(Value e) { - using op::operator&&; - return static_cast(*this) && e; -} -template -Value TemplatedIndexedValue::operator||(Value e) { - using op::operator||; - return static_cast(*this) || e; -} - -/// Comparison operator overloadings. -template -Value TemplatedIndexedValue::eq(Value e) { - return eq(value, e); -} -template -Value TemplatedIndexedValue::ne(Value e) { - return ne(value, e); -} -template -Value TemplatedIndexedValue::slt(Value e) { - using op::slt; - return slt(static_cast(*this), e); -} -template -Value TemplatedIndexedValue::sle(Value e) { - using op::sle; - return sle(static_cast(*this), e); -} -template -Value TemplatedIndexedValue::sgt(Value e) { - using op::sgt; - return sgt(static_cast(*this), e); -} -template -Value TemplatedIndexedValue::sge(Value e) { - using op::sge; - return sge(static_cast(*this), e); -} -template -Value TemplatedIndexedValue::ult(Value e) { - using op::ult; - return ult(static_cast(*this), e); -} -template -Value TemplatedIndexedValue::ule(Value e) { - using op::ule; - return ule(static_cast(*this), e); -} -template -Value TemplatedIndexedValue::ugt(Value e) { - using op::ugt; - return ugt(static_cast(*this), e); -} -template -Value TemplatedIndexedValue::uge(Value e) { - using op::uge; - return uge(static_cast(*this), e); -} - } // namespace edsc } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h --- a/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h @@ -21,9 +21,6 @@ using affine_max = ValueBuilder; using affine_store = OperationBuilder; -/// Provide an index notation around affine_load and affine_store. -using AffineIndexedValue = TemplatedIndexedValue; - } // namespace intrinsics } // namespace edsc } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -244,19 +244,15 @@ /// Utility class used to generate nested loops with ranges described by /// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn` /// is used to generate the body of the innermost loop. It is passed a range -/// of loop induction variables. +/// of loop induction variables and a range of iterArgs. template struct GenerateLoopNest { - using IndexedValueTy = - typename std::conditional::value, - edsc::intrinsics::AffineIndexedValue, - edsc::intrinsics::MemRefIndexedValue>::type; - - static void - doit(ArrayRef loopRanges, LinalgOp linalgOp, - ArrayRef iteratorTypes, - function_ref bodyBuilderFn, - Optional = None); + static void doit(OpBuilder &b, Location loc, ArrayRef loopRanges, + LinalgOp linalgOp, ArrayRef iteratorTypes, + function_ref + bodyBuilderFn, + Optional = None); }; } // namespace linalg diff --git a/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h --- a/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h @@ -31,9 +31,6 @@ using memref_tensor_store = OperationBuilder; using memref_view = ValueBuilder; -/// Provide an index notation around memref_load and memref_store. -using MemRefIndexedValue = - TemplatedIndexedValue; } // namespace intrinsics } // namespace edsc } // namespace mlir diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h deleted file mode 100644 --- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h +++ /dev/null @@ -1,56 +0,0 @@ -//===- Builders.h - MLIR Declarative Builder Classes ------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Provides intuitive composable interfaces for building structured MLIR -// snippets in a declarative fashion. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_DIALECT_SCF_EDSC_BUILDERS_H_ -#define MLIR_DIALECT_SCF_EDSC_BUILDERS_H_ - -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/EDSC/Builders.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/Types.h" - -namespace mlir { -namespace edsc { - -/// Adapters for building loop nests using the builder and the location stored -/// in ScopedContext. Actual builders are in scf::buildLoopNest. -scf::LoopNest loopNestBuilder(ValueRange lbs, ValueRange ubs, - ValueRange steps, - function_ref fun = nullptr); -scf::LoopNest loopNestBuilder(Value lb, Value ub, Value step, - function_ref fun = nullptr); -scf::LoopNest loopNestBuilder( - Value lb, Value ub, Value step, ValueRange iterArgInitValues, - function_ref fun = nullptr); -scf::LoopNest loopNestBuilder( - ValueRange lbs, ValueRange ubs, ValueRange steps, - ValueRange iterArgInitValues, - function_ref fun = nullptr); - -/// Adapters for building if conditions using the builder and the location -/// stored in ScopedContext. 'thenBody' is mandatory, 'elseBody' can be omitted -/// if the condition should not have an 'else' part. -/// When `ifOp` is specified, the scf::IfOp is captured. This is particularly -/// convenient for 0-result conditions. -ValueRange conditionBuilder(TypeRange results, Value condition, - function_ref thenBody, - function_ref elseBody = nullptr, - scf::IfOp *ifOp = nullptr); -ValueRange conditionBuilder(Value condition, function_ref thenBody, - function_ref elseBody = nullptr, - scf::IfOp *ifOp = nullptr); - -} // namespace edsc -} // namespace mlir - -#endif // MLIR_DIALECT_SCF_EDSC_BUILDERS_H_ diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h deleted file mode 100644 --- a/mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h +++ /dev/null @@ -1,24 +0,0 @@ -//===- Intrinsics.h - MLIR EDSC Intrinsics for SCF --------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM -// Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_ -#define MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_ - -#include "mlir/Dialect/SCF/EDSC/Builders.h" - -namespace mlir { -namespace edsc { -namespace intrinsics { - -using loop_yield = OperationBuilder; - -} // namespace intrinsics -} // namespace edsc -} // namespace mlir - -#endif // MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_ diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h --- a/mlir/include/mlir/EDSC/Builders.h +++ b/mlir/include/mlir/EDSC/Builders.h @@ -169,172 +169,6 @@ SmallVector exprs; }; -/// A TemplatedIndexedValue brings an index notation over the template Load and -/// Store parameters. Assigning to an IndexedValue emits an actual `Store` -/// operation, while converting an IndexedValue to a Value emits an actual -/// `Load` operation. -template -class TemplatedIndexedValue { -public: - explicit TemplatedIndexedValue(Value v) : value(v) {} - - TemplatedIndexedValue(const TemplatedIndexedValue &rhs) = default; - - TemplatedIndexedValue operator()() { return *this; } - /// Returns a new `TemplatedIndexedValue`. - TemplatedIndexedValue operator()(Value index) { - TemplatedIndexedValue res(value); - res.indices.push_back(index); - return res; - } - template - TemplatedIndexedValue operator()(Value index, Args... indices) { - return TemplatedIndexedValue(value, index).append(indices...); - } - TemplatedIndexedValue operator()(ValueRange indices) { - return TemplatedIndexedValue(value, indices); - } - - /// Emits a `store`. - Store operator=(const TemplatedIndexedValue &rhs) { - return Store(rhs, value, indices); - } - Store operator=(Value rhs) { return Store(rhs, value, indices); } - - /// Emits a `load` when converting to a Value. - operator Value() const { return Load(value, indices); } - - /// Returns the base memref. - Value getBase() const { return value; } - - /// Returns the underlying memref. - MemRefType getMemRefType() const { - return value.getType().template cast(); - } - - /// Returns the underlying MemRef elemental type cast as `T`. - template - T getElementalTypeAs() const { - return value.getType() - .template cast() - .getElementType() - .template cast(); - } - - /// Arithmetic operator overloadings. - Value operator+(Value e); - Value operator-(Value e); - Value operator*(Value e); - Value operator/(Value e); - Value operator%(Value e); - Value operator^(Value e); - Value operator+(TemplatedIndexedValue e) { - return *this + static_cast(e); - } - Value operator-(TemplatedIndexedValue e) { - return *this - static_cast(e); - } - Value operator*(TemplatedIndexedValue e) { - return *this * static_cast(e); - } - Value operator/(TemplatedIndexedValue e) { - return *this / static_cast(e); - } - Value operator%(TemplatedIndexedValue e) { - return *this % static_cast(e); - } - Value operator^(TemplatedIndexedValue e) { - return *this ^ static_cast(e); - } - - /// Assignment-arithmetic operator overloadings. - Store operator+=(Value e); - Store operator-=(Value e); - Store operator*=(Value e); - Store operator/=(Value e); - Store operator%=(Value e); - Store operator^=(Value e); - Store operator+=(TemplatedIndexedValue e) { - return this->operator+=(static_cast(e)); - } - Store operator-=(TemplatedIndexedValue e) { - return this->operator-=(static_cast(e)); - } - Store operator*=(TemplatedIndexedValue e) { - return this->operator*=(static_cast(e)); - } - Store operator/=(TemplatedIndexedValue e) { - return this->operator/=(static_cast(e)); - } - Store operator%=(TemplatedIndexedValue e) { - return this->operator%=(static_cast(e)); - } - Store operator^=(TemplatedIndexedValue e) { - return this->operator^=(static_cast(e)); - } - - /// Logical operator overloadings. - Value operator&&(Value e); - Value operator||(Value e); - Value operator&&(TemplatedIndexedValue e) { - return *this && static_cast(e); - } - Value operator||(TemplatedIndexedValue e) { - return *this || static_cast(e); - } - - /// Comparison operator overloadings. - Value eq(Value e); - Value ne(Value e); - Value slt(Value e); - Value sle(Value e); - Value sgt(Value e); - Value sge(Value e); - Value ult(Value e); - Value ule(Value e); - Value ugt(Value e); - Value uge(Value e); - Value slt(TemplatedIndexedValue e) { - return slt(*this, static_cast(e)); - } - Value sle(TemplatedIndexedValue e) { - return sle(*this, static_cast(e)); - } - Value sgt(TemplatedIndexedValue e) { - return sgt(*this, static_cast(e)); - } - Value sge(TemplatedIndexedValue e) { - return sge(*this, static_cast(e)); - } - Value ult(TemplatedIndexedValue e) { - return ult(*this, static_cast(e)); - } - Value ule(TemplatedIndexedValue e) { - return ule(*this, static_cast(e)); - } - Value ugt(TemplatedIndexedValue e) { - return ugt(*this, static_cast(e)); - } - Value uge(TemplatedIndexedValue e) { - return uge(*this, static_cast(e)); - } - -private: - TemplatedIndexedValue(Value value, ValueRange indices) - : value(value), indices(indices.begin(), indices.end()) {} - - TemplatedIndexedValue &append() { return *this; } - - template - TemplatedIndexedValue &append(T index, Args... indices) { - this->indices.push_back(static_cast(index)); - append(indices...); - return *this; - } - Value value; - SmallVector indices; -}; - } // namespace edsc } // namespace mlir diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp --- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp @@ -14,7 +14,7 @@ #include "mlir/Dialect/GPU/MemoryPromotion.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" -#include "mlir/Dialect/SCF/EDSC/Builders.h" +#include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/LoopUtils.h" @@ -41,7 +41,7 @@ /// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with /// single-iteration loops. Maps the innermost loops to thread dimensions, in /// reverse order to enable access coalescing in the innermost loop. -static void insertCopyLoops(OpBuilder &builder, Location loc, +static void insertCopyLoops(OpBuilder &b, Location loc, MemRefBoundsCapture &bounds, Value from, Value to) { // Create EDSC handles for bounds. unsigned rank = bounds.rank(); @@ -68,24 +68,24 @@ [](int64_t step) { return std_constant_index(step); }); // Obtain thread identifiers and block sizes, necessary to map to them. - auto indexType = builder.getIndexType(); + auto indexType = b.getIndexType(); SmallVector threadIds, blockDims; for (unsigned i = 0; i < 3; ++i) { - auto dimName = builder.getStringAttr(getDimName(i)); - threadIds.push_back( - builder.create(loc, indexType, dimName)); - blockDims.push_back( - builder.create(loc, indexType, dimName)); + auto dimName = b.getStringAttr(getDimName(i)); + threadIds.push_back(b.create(loc, indexType, dimName)); + blockDims.push_back(b.create(loc, indexType, dimName)); } // Produce the loop nest with copies. SmallVector ivs(lbs.size()); - loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - ivs.assign(loopIvs.begin(), loopIvs.end()); - auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank); - MemRefIndexedValue fromHandle(from), toHandle(to); - toHandle(activeIvs) = fromHandle(activeIvs); - }); + mlir::scf::buildLoopNest( + b, loc, lbs, ubs, steps, + [&](OpBuilder &b, Location loc, ValueRange loopIvs) { + ivs.assign(loopIvs.begin(), loopIvs.end()); + auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank); + Value loaded = b.create(loc, from, activeIvs); + b.create(loc, loaded, to, activeIvs); + }); // Map the innermost loops to threads in reverse order. for (auto en : @@ -142,17 +142,17 @@ assert(llvm::hasSingleElement(region) && "unstructured control flow not supported"); - OpBuilder builder(region.getContext()); - builder.setInsertionPointToStart(®ion.front()); + OpBuilder b(region.getContext()); + b.setInsertionPointToStart(®ion.front()); - ScopedContext edscContext(builder, loc); + ScopedContext edscContext(b, loc); MemRefBoundsCapture fromBoundsCapture(from); - insertCopyLoops(builder, loc, fromBoundsCapture, from, to); - builder.create(loc); + insertCopyLoops(b, loc, fromBoundsCapture, from, to); + b.create(loc); - builder.setInsertionPoint(®ion.front().back()); - builder.create(loc); - insertCopyLoops(builder, loc, fromBoundsCapture, to, from); + b.setInsertionPoint(®ion.front().back()); + b.create(loc); + insertCopyLoops(b, loc, fromBoundsCapture, to, from); } /// Promotes a function argument to workgroup memory in the given function. The diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp --- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp @@ -11,7 +11,6 @@ #include "mlir/Dialect/Linalg/EDSC/Builders.h" #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h" #include "mlir/Dialect/Math/EDSC/Intrinsics.h" -#include "mlir/Dialect/SCF/EDSC/Builders.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/IR/AffineExpr.h" diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -7,16 +7,11 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" -#include "mlir/Dialect/Affine/EDSC/Intrinsics.h" -#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" -#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" -#include "mlir/Dialect/SCF/EDSC/Builders.h" -#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BlockAndValueMapping.h" @@ -27,38 +22,63 @@ #include "llvm/ADT/TypeSwitch.h" using namespace mlir; -using namespace mlir::edsc; -using namespace mlir::edsc::intrinsics; using namespace mlir::linalg; -using edsc::op::operator+; +struct ArithBuilder { + ArithBuilder(OpBuilder &b, Location loc) : b(b), loc(loc) {} -static SmallVector makeCanonicalAffineApplies(OpBuilder &b, - Location loc, - AffineMap map, - ArrayRef vals) { + Value select(Value cmp, Value lhs, Value rhs) { + return b.create(loc, cmp, lhs, rhs); + } + Value slt(Value lhs, Value rhs) { + if (lhs.getType().isa()) + return b.create(loc, CmpIPredicate::slt, lhs, rhs); + return b.create(loc, CmpFPredicate::OLT, lhs, rhs); + } + Value sgt(Value lhs, Value rhs) { + if (lhs.getType().isa()) + return b.create(loc, CmpIPredicate::sgt, lhs, rhs); + return b.create(loc, CmpFPredicate::OGT, lhs, rhs); + } + Value add(Value lhs, Value rhs) { + if (lhs.getType().isa()) + return b.create(loc, lhs, rhs); + return b.create(loc, lhs, rhs); + } + Value mul(Value lhs, Value rhs) { + if (lhs.getType().isa()) + return b.create(loc, lhs, rhs); + return b.create(loc, lhs, rhs); + } + + OpBuilder &b; + Location loc; +}; + +static SmallVector makeCanonicalAffineApplies(OpBuilder &b, Location loc, + AffineMap map, + ArrayRef vals) { if (map.isEmpty()) return {}; assert(map.getNumInputs() == vals.size()); - SmallVector res; + SmallVector res; res.reserve(map.getNumResults()); auto dims = map.getNumDims(); for (auto e : map.getResults()) { auto exprMap = AffineMap::get(dims, map.getNumSymbols(), e); - SmallVector operands(vals.begin(), vals.end()); + SmallVector operands(vals.begin(), vals.end()); canonicalizeMapAndOperands(&exprMap, &operands); - res.push_back(affine_apply(exprMap, operands)); + res.push_back(b.create(loc, exprMap, operands)); } return res; } -template -static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, - ArrayRef> indexing, +template +static void inlineRegionAndEmitStore(OpBuilder &b, Location loc, OpType op, + ArrayRef indexedValues, + ArrayRef> indexing, ArrayRef outputBuffers) { - assert(op->getNumRegions() == 1 && "Expected single region op"); - auto &b = ScopedContext::getBuilderRef(); auto &block = op->getRegion(0).front(); BlockAndValueMapping map; map.map(block.getArguments(), indexedValues); @@ -67,26 +87,24 @@ map.map(op.getResults(), newOp->getResults()); } - Operation &terminator = block.back(); - assert(isa(terminator) && - "expected a yield op in the end of the region"); - for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) { - IndexedValueType O(outputBuffers[i]); - O(indexing[i]) = map.lookupOrDefault(terminator.getOperand(i)); + Operation *terminator = block.getTerminator(); + for (OpOperand &operand : terminator->getOpOperands()) { + Value toStore = map.lookupOrDefault(operand.get()); + b.create(loc, toStore, outputBuffers[operand.getOperandNumber()], + indexing[operand.getOperandNumber()]); } } // Returns a pair that contains input indices and output indices of a // SingleInputPoolingOp `op`. struct InputAndOutputIndices { - SmallVector inputs; - SmallVector outputs; + SmallVector inputs; + SmallVector outputs; }; template -static InputAndOutputIndices getInputAndOutputIndices(ArrayRef allIvs, - SingleInputPoolingOp op) { - auto &b = ScopedContext::getBuilderRef(); - auto loc = ScopedContext::getLocation(); +static InputAndOutputIndices +getInputAndOutputIndices(OpBuilder &b, Location loc, ArrayRef allIvs, + SingleInputPoolingOp op) { auto mapsRange = op.indexing_maps().template getAsRange(); auto maps = llvm::to_vector<8>( llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); })); @@ -125,19 +143,18 @@ /// } /// } /// ``` -template -static void emitScalarImplementation(ArrayRef allIvs, +template +static void emitScalarImplementation(OpBuilder &b, Location loc, + ArrayRef allIvs, LinalgOp linalgOp) { assert(linalgOp.hasBufferSemantics() && "expected linalg op with buffer semantics"); - auto &b = ScopedContext::getBuilderRef(); - auto loc = ScopedContext::getLocation(); unsigned nInputs = linalgOp.getNumInputs(); unsigned nOutputs = linalgOp.getNumOutputs(); - SmallVector indexedValues; + SmallVector indexedValues; indexedValues.reserve(nInputs + nOutputs); - auto allIvsPlusDims = SmallVector(allIvs.begin(), allIvs.end()); + auto allIvsPlusDims = SmallVector(allIvs.begin(), allIvs.end()); // TODO: Avoid the loads if the corresponding argument of the // region has no uses. @@ -145,46 +162,40 @@ for (unsigned i = 0; i < nInputs; ++i) { auto indexing = makeCanonicalAffineApplies( b, loc, linalgOp.getInputIndexingMap(i), allIvsPlusDims); - // Passing through IndexedValueType emits the proper load operation. - indexedValues.push_back(IndexedValueType(linalgOp.getInput(i))(indexing)); + indexedValues.push_back( + b.create(loc, linalgOp.getInput(i), indexing)); } // 1.b. Emit load from output views. for (unsigned i = 0; i < nOutputs; ++i) { auto indexing = makeCanonicalAffineApplies( b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims); - // Passing through IndexedValueType emits the proper load operation. indexedValues.push_back( - IndexedValueType(linalgOp.getOutputBuffer(i))(indexing)); + b.create(loc, linalgOp.getOutputBuffer(i), indexing)); } // TODO: When a region inliner exists, use it. // 2. Inline region, currently only works for a single basic block. // 3. Emit store. - SmallVector, 8> indexing; - SmallVector outputBuffers; + SmallVector, 8> indexing; + SmallVector outputBuffers; for (unsigned i = 0; i < nOutputs; ++i) { indexing.push_back(makeCanonicalAffineApplies( b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims)); outputBuffers.push_back(linalgOp.getOutputBuffer(i)); } - inlineRegionAndEmitStore(linalgOp, indexedValues, indexing, - outputBuffers); + inlineRegionAndEmitStore(b, loc, linalgOp, indexedValues, + indexing, outputBuffers); } // Create a padded view into the given `input` tensor using the 'indices' // to access the tensor. `skipPadding` lists the dimensions for which no padding // is needed e.g. the non-spatial dimensions for convolutions. -template -Value getPaddedInput(Value input, ArrayRef indices, - ArrayRef skipPadding, Value padValue) { - // TODO: add a level of indirection to linalg.generic. - - IndexedValueType indexedInput(input); - - auto *context = ScopedContext::getContext(); - Value zeroIndex = std_constant_index(0); - SmallVector conds; - SmallVector clampedImIdx; +Value getPaddedInput(OpBuilder &b, Location loc, Value input, + ArrayRef indices, ArrayRef skipPadding, + Value padValue) { + Value zeroIndex = b.create(loc, 0); + SmallVector conds; + SmallVector clampedImIdx; for (auto iter : llvm::enumerate(indices)) { int idx = iter.index(); auto dim = iter.value(); @@ -193,29 +204,33 @@ continue; } - using edsc::op::sge; - using edsc::op::slt; - using edsc::op::operator||; - Value leftOutOfBound = slt(dim, zeroIndex); + Value leftOutOfBound = + b.create(loc, CmpIPredicate::slt, dim, zeroIndex); if (conds.empty()) conds.push_back(leftOutOfBound); else - conds.push_back(conds.back() || leftOutOfBound); - Value rightBound = memref_dim(input, idx); - conds.push_back(conds.back() || (sge(dim, rightBound))); + conds.push_back(b.create(loc, conds.back(), leftOutOfBound)); + Value rightBound = b.create(loc, input, idx); + Value rightOutOfBound = + b.create(loc, CmpIPredicate::sge, dim, rightBound); + conds.push_back(b.create(loc, conds.back(), rightOutOfBound)); // When padding is involved, the indices will only be shifted to negative, // so having a max op is enough. - auto maxMap = AffineMap::get(/*dimCount=*/1, 0, - {getAffineDimExpr(/*position=*/0, context), - getAffineConstantExpr(0, context)}, - context); - clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim})); + MLIRContext *ctx = input.getContext(); + AffineExpr m = getAffineDimExpr(/*position=*/0, ctx), + zero = getAffineConstantExpr(0, ctx); + AffineMap maxMap = + AffineMap::inferFromExprList(ArrayRef>{{m, zero}}) + .front(); + clampedImIdx.push_back(b.create(loc, maxMap, ValueRange{dim})); } - Value readInput = indexedInput(clampedImIdx); - return conds.empty() ? readInput - : (Value)std_select(conds.back(), padValue, readInput); + Value readInput = b.create(loc, input, clampedImIdx); + if (conds.empty()) + return readInput; + + return b.create(loc, conds.back(), padValue, readInput); } namespace { @@ -229,48 +244,47 @@ } template <> Attribute getPadValueAttr(Type type) { - auto &b = ScopedContext::getBuilderRef(); if (auto floatType = type.dyn_cast()) { - return b.getFloatAttr( - floatType, - APFloat::getInf(floatType.getFloatSemantics(), /*Negative*/ true)); + return OpBuilder(type.getContext()) + .getFloatAttr(floatType, APFloat::getInf(floatType.getFloatSemantics(), + /*Negative*/ true)); } if (auto intType = type.dyn_cast()) { unsigned width = intType.getWidth(); // The select instruction used to lower the PoolingMin uses a signed // comparison, use a signed constant irrespective of the signedness of the // integer type. - return b.getIntegerAttr(intType, APInt::getSignedMinValue(width)); + return OpBuilder(type.getContext()) + .getIntegerAttr(intType, APInt::getSignedMinValue(width)); } llvm_unreachable("Unsupported data type for PoolingMaxOp"); return {}; } template <> Attribute getPadValueAttr(Type type) { - auto &b = ScopedContext::getBuilderRef(); if (auto floatType = type.dyn_cast()) { - return b.getFloatAttr(floatType, - APFloat::getInf(floatType.getFloatSemantics())); + return OpBuilder(type.getContext()) + .getFloatAttr(floatType, + APFloat::getInf(floatType.getFloatSemantics())); } if (auto intType = type.dyn_cast()) { unsigned width = intType.getWidth(); // The select instruction used to lower the PoolingMin uses a signed // comparison, use a signed constant irrespective of the signedness of the // integer type. - return b.getIntegerAttr(intType, APInt::getSignedMaxValue(width)); + return OpBuilder(type.getContext()) + .getIntegerAttr(intType, APInt::getSignedMaxValue(width)); } llvm_unreachable("Unsupported data type for PoolingMinOp"); return {}; } template <> Attribute getPadValueAttr(Type type) { - auto &b = ScopedContext::getBuilderRef(); - return b.getZeroAttr(type); + return OpBuilder(type.getContext()).getZeroAttr(type); } template <> Attribute getPadValueAttr(Type type) { - auto &b = ScopedContext::getBuilderRef(); - return b.getZeroAttr(type); + return OpBuilder(type.getContext()).getZeroAttr(type); } } // namespace @@ -284,38 +298,43 @@ return false; } -template -static void emitScalarImplementation(ArrayRef allIvs, ConvOp convOp) { +template +static void emitScalarImplementation(OpBuilder &b, Location loc, + ArrayRef allIvs, ConvOp convOp) { assert(convOp.hasBufferSemantics() && "expected linalg op with buffer semantics"); - auto &b = ScopedContext::getBuilderRef(); - auto loc = ScopedContext::getLocation(); auto mapsRange = convOp.indexing_maps().getAsRange(); auto maps = llvm::to_vector<8>( llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); })); - SmallVector fIdx( - makeCanonicalAffineApplies(b, loc, maps[0], allIvs)); - SmallVector imIdx( - makeCanonicalAffineApplies(b, loc, maps[1], allIvs)); - SmallVector oIdx( - makeCanonicalAffineApplies(b, loc, maps[2], allIvs)); + SmallVector fIdx(makeCanonicalAffineApplies(b, loc, maps[0], allIvs)); + SmallVector imIdx(makeCanonicalAffineApplies(b, loc, maps[1], allIvs)); + SmallVector oIdx(makeCanonicalAffineApplies(b, loc, maps[2], allIvs)); - IndexedValueType F(convOp.filter()), O(convOp.output()); + Value filter = convOp.filter(), output = convOp.output(); // Emit scalar form. Padded conv involves an affine.max in the memory access // which is not allowed by affine.load. Override to use an MemRefIndexedValue // when there is non-zero padding. if (hasPadding(convOp)) { Type type = convOp.input().getType().cast().getElementType(); - Value padValue = std_constant(type, getPadValueAttr(type)); - Value paddedInput = getPaddedInput( - convOp.input(), imIdx, - /* Only need to pad the window dimensions */ - {0, static_cast(imIdx.size()) - 1}, padValue); - O(oIdx) += F(fIdx) * paddedInput; + Value padValue = + b.create(loc, type, getPadValueAttr(type)); + Value paddedInput = + getPaddedInput(b, loc, convOp.input(), imIdx, + /* Only need to pad the window dimensions */ + {0, static_cast(imIdx.size()) - 1}, padValue); + Value filterVal = b.create(loc, filter, fIdx); + Value mulVal = ArithBuilder(b, loc).mul(filterVal, paddedInput); + Value outputVal = b.create(loc, output, oIdx); + Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal); + b.create(loc, addVal, output, oIdx); } else { - IndexedValueType I(convOp.input()); - O(oIdx) += F(fIdx) * I(imIdx); + Value inputVal = b.create(loc, convOp.input(), imIdx); + Value filterVal = b.create(loc, filter, fIdx); + Value mulVal = ArithBuilder(b, loc).mul(filterVal, inputVal); + Value outputVal = b.create(loc, output, oIdx); + Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal); + b.create(loc, addVal, output, oIdx); } } @@ -327,55 +346,62 @@ return false; } -template -static Value getPoolingInput(PoolingOp op, ArrayRef inputIndices) { +template +static Value getPoolingInput(OpBuilder &b, Location loc, PoolingOp op, + ArrayRef inputIndices) { if (hasPadding(op)) { Type type = op.input().getType().template cast().getElementType(); - Value padValue = std_constant(type, getPadValueAttr(type)); - return getPaddedInput(op.input(), inputIndices, - /*Pad every dimension*/ {}, - padValue); + Value padValue = + b.create(loc, type, getPadValueAttr(type)); + return getPaddedInput(b, loc, op.input(), inputIndices, + /*Pad every dimension*/ {}, padValue); } - IndexedValueType input(op.input()); - return input(inputIndices); + return b.create(loc, op.input(), inputIndices); } -template -void emitPoolingMinMaxScalarImplementation(ArrayRef allIvs, OpType op) { - InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op); - // Emit scalar form. - IndexedValueType output(op.output()); - Value lhs = output(indices.outputs); - Value rhs = getPoolingInput(op, indices.inputs); - using edsc::op::sgt; - using edsc::op::slt; - Value value = std::is_same() - ? std_select(slt(lhs, rhs), lhs, rhs) - : std_select(sgt(lhs, rhs), lhs, rhs); - output(indices.outputs) = value; +template +void emitPoolingMinMaxScalarImplementation(OpBuilder &b, Location loc, + ArrayRef allIvs, OpType op) { + InputAndOutputIndices indices = getInputAndOutputIndices(b, loc, allIvs, op); + Value lhs = b.create(loc, op.output(), indices.outputs); + Value rhs = getPoolingInput(b, loc, op, indices.inputs); + Value value = llvm::TypeSwitch(op) + .Case([&](PoolingMinOp poolingOp) { + return ArithBuilder(b, loc).select( + ArithBuilder(b, loc).slt(lhs, rhs), lhs, rhs); + }) + .Case([&](PoolingMaxOp poolingOp) { + return ArithBuilder(b, loc).select( + ArithBuilder(b, loc).sgt(lhs, rhs), lhs, rhs); + }) + .Default([&](auto) { return Value(); }); + b.create(loc, value, op.output(), indices.outputs); } -template -static void emitScalarImplementation(ArrayRef allIvs, PoolingMaxOp op) { - emitPoolingMinMaxScalarImplementation(allIvs, - op); +template +static void emitScalarImplementation(OpBuilder &b, Location loc, + ArrayRef allIvs, PoolingMaxOp op) { + emitPoolingMinMaxScalarImplementation( + b, loc, allIvs, op); } -template -static void emitScalarImplementation(ArrayRef allIvs, PoolingMinOp op) { - emitPoolingMinMaxScalarImplementation(allIvs, - op); +template +static void emitScalarImplementation(OpBuilder &b, Location loc, + ArrayRef allIvs, PoolingMinOp op) { + emitPoolingMinMaxScalarImplementation( + b, loc, allIvs, op); } -template -static void emitScalarImplementation(ArrayRef allIvs, PoolingSumOp op) { - auto indices = getInputAndOutputIndices(allIvs, op); - IndexedValueType output(op.output()); - - // Emit scalar form. - output(indices.outputs) += - getPoolingInput(op, indices.inputs); +template +static void emitScalarImplementation(OpBuilder &b, Location loc, + ArrayRef allIvs, PoolingSumOp op) { + auto indices = getInputAndOutputIndices(b, loc, allIvs, op); + Value inputVal = + getPoolingInput(b, loc, op, indices.inputs); + Value outputVal = b.create(loc, op.output(), indices.outputs); + Value added = ArithBuilder(b, loc).add(outputVal, inputVal); + b.create(loc, added, op.output(), indices.outputs); } /// Replace the index operations in the body of the loop nest by the matching @@ -413,8 +439,12 @@ template static Optional linalgOpToLoopsImpl(PatternRewriter &rewriter, LinalgOp linalgOp) { - using IndexedValueTy = typename GenerateLoopNest::IndexedValueTy; - ScopedContext scope(rewriter, linalgOp.getLoc()); + using LoadOpTy = + typename std::conditional::value, + AffineLoadOp, memref::LoadOp>::type; + using StoreOpTy = + typename std::conditional::value, + AffineStoreOp, memref::StoreOp>::type; // Canonicalize indexed_generic operations before lowering them to loops. if (isa(linalgOp)) @@ -428,16 +458,18 @@ auto loopRanges = linalgOp.createLoopRanges(rewriter, linalgOp.getLoc()); auto iteratorTypes = llvm::to_vector<4>(linalgOp.iterator_types().getValue()); - SmallVector allIvs; + SmallVector allIvs; GenerateLoopNest::doit( - loopRanges, linalgOp, iteratorTypes, - [&](ValueRange ivs, ValueRange iterArgs) -> scf::ValueVector { + rewriter, linalgOp.getLoc(), loopRanges, linalgOp, iteratorTypes, + [&](OpBuilder &b, Location loc, ValueRange ivs, + ValueRange iterArgs) -> scf::ValueVector { assert(iterArgs.empty() && "unexpected iterArgs"); allIvs.append(ivs.begin(), ivs.end()); llvm::TypeSwitch(linalgOp) .Case( [&](auto op) { - emitScalarImplementation(allIvs, op); + emitScalarImplementation(b, loc, allIvs, + op); }) .Default([&](Operation *op) { assert(false && "unexpected op"); }); return scf::ValueVector{}; @@ -499,7 +531,7 @@ tiledLoop.upperBound(), tiledLoop.step(), [&](OpBuilder &builder, Location loc, ValueRange ivs) { // Move body without its terminator. - SmallVector newBlockArgs; + SmallVector newBlockArgs; newBlockArgs.append(ivs.begin(), ivs.end()); newBlockArgs.append(tiledLoop.inputs().begin(), tiledLoop.inputs().end()); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -19,7 +19,6 @@ #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/SCF/EDSC/Builders.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/AffineExpr.h" @@ -225,69 +224,67 @@ // 2. Create the tiled loops. LinalgOp res = op; SmallVector ivs, tensorResults; - GenerateLoopNest::doit( - loopRanges, op, iteratorTypes, - [&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector { - auto &b = ScopedContext::getBuilderRef(); - auto loc = ScopedContext::getLocation(); - ivs.assign(localIvs.begin(), localIvs.end()); - - // When an `interchangeVector` is present, it has been applied to the - // loop ranges and the iterator types. Apply its inverse to the - // resulting loop `ivs` to match the op definition. - SmallVector interchangedIvs; - if (!options.interchangeVector.empty()) - interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs); - else - interchangedIvs.assign(ivs.begin(), ivs.end()); - - assert(op.getNumOutputTensors() == iterArgs.size() && - "num output tensors must match number of loop iter arguments"); - - auto operands = llvm::to_vector<4>(op.getInputs()); - SmallVector outputBuffers = op.getOutputBuffers(); - // TODO: thanks to simplifying assumption we do not need to worry about - // order of output buffers and tensors: there is only ever one kind. - assert(outputBuffers.empty() || iterArgs.empty()); - operands.append(outputBuffers.begin(), outputBuffers.end()); - operands.append(iterArgs.begin(), iterArgs.end()); - auto sizeBounds = - applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes); - SmallVector tiledOperands = makeTiledShapes( - b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds); - auto nonShapedOperands = op.getAssumedNonShapedOperands(); - tiledOperands.append(nonShapedOperands.begin(), - nonShapedOperands.end()); - - // TODO: use an interface/adaptor to avoid leaking position in - // `tiledOperands`. - SmallVector resultTensorTypes; - for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) - resultTensorTypes.push_back( - tiledOperands[opOperand->getOperandNumber()].getType()); - - res = op.clone(b, loc, resultTensorTypes, tiledOperands); - - // Insert a subtensor_insert for each output tensor. - unsigned resultIdx = 0; - for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) { - // TODO: use an interface/adaptor to avoid leaking position in - // `tiledOperands`. - Value outputTensor = tiledOperands[opOperand->getOperandNumber()]; - if (auto subtensor = outputTensor.getDefiningOp()) { - tensorResults.push_back(b.create( - loc, subtensor.source().getType(), res->getResult(resultIdx), - subtensor.source(), subtensor.offsets(), subtensor.sizes(), - subtensor.strides(), subtensor.static_offsets(), - subtensor.static_sizes(), subtensor.static_strides())); - } else { - tensorResults.push_back(res->getResult(resultIdx)); - } - ++resultIdx; - } - return scf::ValueVector(tensorResults.begin(), tensorResults.end()); - }, - options.distribution); + auto tiledLoopBodyBuilder = [&](OpBuilder &b, Location loc, + ValueRange localIvs, + ValueRange iterArgs) -> scf::ValueVector { + ivs.assign(localIvs.begin(), localIvs.end()); + + // When an `interchangeVector` is present, it has been applied to the + // loop ranges and the iterator types. Apply its inverse to the + // resulting loop `ivs` to match the op definition. + SmallVector interchangedIvs; + if (!options.interchangeVector.empty()) + interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs); + else + interchangedIvs.assign(ivs.begin(), ivs.end()); + + assert(op.getNumOutputTensors() == iterArgs.size() && + "num output tensors must match number of loop iter arguments"); + + auto operands = llvm::to_vector<4>(op.getInputs()); + SmallVector outputBuffers = op.getOutputBuffers(); + // TODO: thanks to simplifying assumption we do not need to worry about + // order of output buffers and tensors: there is only ever one kind. + assert(outputBuffers.empty() || iterArgs.empty()); + operands.append(outputBuffers.begin(), outputBuffers.end()); + operands.append(iterArgs.begin(), iterArgs.end()); + auto sizeBounds = + applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes); + SmallVector tiledOperands = makeTiledShapes( + b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds); + auto nonShapedOperands = op.getAssumedNonShapedOperands(); + tiledOperands.append(nonShapedOperands.begin(), nonShapedOperands.end()); + + // TODO: use an interface/adaptor to avoid leaking position in + // `tiledOperands`. + SmallVector resultTensorTypes; + for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) + resultTensorTypes.push_back( + tiledOperands[opOperand->getOperandNumber()].getType()); + + res = op.clone(b, loc, resultTensorTypes, tiledOperands); + + // Insert a subtensor_insert for each output tensor. + unsigned resultIdx = 0; + for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) { + // TODO: use an interface/adaptor to avoid leaking position in + // `tiledOperands`. + Value outputTensor = tiledOperands[opOperand->getOperandNumber()]; + if (auto subtensor = outputTensor.getDefiningOp()) { + tensorResults.push_back(b.create( + loc, subtensor.source().getType(), res->getResult(resultIdx), + subtensor.source(), subtensor.offsets(), subtensor.sizes(), + subtensor.strides(), subtensor.static_offsets(), + subtensor.static_sizes(), subtensor.static_strides())); + } else { + tensorResults.push_back(res->getResult(resultIdx)); + } + ++resultIdx; + } + return scf::ValueVector(tensorResults.begin(), tensorResults.end()); + }; + GenerateLoopNest::doit(b, op.getLoc(), loopRanges, op, iteratorTypes, + tiledLoopBodyBuilder, options.distribution); // 3. Transform IndexOp results w.r.t. the tiling. transformIndexOps(b, res, ivs, loopIndexToRangeIndex); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -16,7 +16,6 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" -#include "mlir/Dialect/SCF/EDSC/Builders.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" @@ -197,15 +196,14 @@ /// Specialization to build an scf "for" nest. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, LinalgOp linalgOp, + OpBuilder &b, Location loc, ArrayRef loopRanges, LinalgOp linalgOp, ArrayRef iteratorTypes, - function_ref bodyBuilderFn, + function_ref + bodyBuilderFn, Optional distributionOptions) { auto iterArgInitValues = linalgOp.getOutputTensors(); // Create procInfo so it dominates loops, if appropriate. - OpBuilder &builder = edsc::ScopedContext::getBuilderRef(); - Location loc = edsc::ScopedContext::getLocation(); - SmallVector procInfo; SmallVector distributionMethod; if (distributionOptions.hasValue()) { @@ -219,13 +217,13 @@ distributionMethod = distributionOptions->distributionMethod; if (distributionMethod.size() < parallelLoopRanges.size()) parallelLoopRanges.resize(distributionMethod.size()); - procInfo = distributionOptions->procInfo(builder, loc, parallelLoopRanges); + procInfo = distributionOptions->procInfo(b, loc, parallelLoopRanges); } SmallVector lbs, ubs, steps; unpackRanges(loopRanges, lbs, ubs, steps); - LoopNest loopNest = - edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn); + LoopNest loopNest = mlir::scf::buildLoopNest( + b, loc, lbs, ubs, steps, iterArgInitValues, bodyBuilderFn); if (!distributionOptions || loopNest.loops.empty()) return; @@ -246,9 +244,11 @@ /// Specialization to build affine "for" nest. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, LinalgOp linalgOp, + OpBuilder &b, Location loc, ArrayRef loopRanges, LinalgOp linalgOp, ArrayRef iteratorTypes, - function_ref bodyBuilderFn, + function_ref + bodyBuilderFn, Optional) { auto iterArgInitValues = linalgOp.getOutputTensors(); assert(iterArgInitValues.empty() && "unexpected AffineForOp init values"); @@ -264,38 +264,36 @@ constantSteps.push_back(op.getValue()); } - auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) { - bodyBuilderFn(ivs, {}); - }; - edsc::affineLoopNestBuilder(lbs, ubs, constantSteps, - bodyBuilderWithoutIterArgsFn); + mlir::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps, + [&](OpBuilder &b, Location loc, ValueRange ivs) { + bodyBuilderFn(b, loc, ivs, {}); + }); } /// Specialization to build an linalg.tiled_loop template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, LinalgOp linalgOp, + OpBuilder &b, Location loc, ArrayRef loopRanges, LinalgOp linalgOp, ArrayRef iteratorTypes, - function_ref bodyBuilderFn, + function_ref + bodyBuilderFn, Optional) { - OpBuilder &builder = edsc::ScopedContext::getBuilderRef(); - Location loc = edsc::ScopedContext::getLocation(); SmallVector procInfo; - SmallVector lbs, ubs, steps; unpackRanges(loopRanges, lbs, ubs, steps); auto wrappedBuilderFn = [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange ivs, ValueRange inputs, ValueRange outputs) { - ScopedContext context(nestedBuilder, nestedLoc); - scf::ValueVector results = bodyBuilderFn(ivs, linalgOp.getOutputTensors()); + scf::ValueVector results = bodyBuilderFn(nestedBuilder, nestedLoc, ivs, + linalgOp.getOutputTensors()); nestedBuilder.create(nestedLoc, results); }; - auto tiledLoop = builder.create( + auto tiledLoop = b.create( loc, lbs, ubs, steps, linalgOp.getInputs(), linalgOp.getOutputs(), - builder.getArrayAttr(iteratorTypes), wrappedBuilderFn); + b.getArrayAttr(iteratorTypes), wrappedBuilderFn); // Replace inputs/outputs with the corresponding region args. auto isInsideTiledLoop = [&](OpOperand &operand) { @@ -310,9 +308,9 @@ } /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`. -void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, - Value procId, Value nprocs, Value &lb, - Value &ub, Value &step) { +void updateBoundsForCyclicDistribution(OpBuilder &b, Location loc, Value procId, + Value nprocs, Value &lb, Value &ub, + Value &step) { using edsc::op::operator+; using edsc::op::operator*; lb = lb + (procId * step); @@ -329,20 +327,22 @@ // TODO: this function can be made iterative instead. However, it // will have at most as many recursive calls as nested loops, which rarely // exceeds 10. -static void -generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps, - ArrayRef iteratorTypes, - function_ref bodyBuilderFn, - SmallVectorImpl &ivStorage, - ArrayRef distributionMethod = {}) { +static void generateParallelLoopNest( + OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, + ValueRange steps, ArrayRef iteratorTypes, + function_ref bodyBuilderFn, + SmallVectorImpl &ivStorage, + ArrayRef distributionMethod = {}) { assert(lbs.size() == ubs.size()); assert(lbs.size() == steps.size()); assert(lbs.size() == iteratorTypes.size()); // If there are no (more) loops to be generated, generate the body and be // done with it. - if (iteratorTypes.empty()) - return bodyBuilderFn(ivStorage); + if (iteratorTypes.empty()) { + bodyBuilderFn(b, loc, ivStorage); + return; + } // Find the outermost parallel loops and drop their types from the list. unsigned nLoops = iteratorTypes.size(); @@ -353,27 +353,29 @@ // recurse. Note that we wouldn't have dropped anything from `iteratorTypes` // in this case. if (nOuterPar == 0) { - edsc::loopNestBuilder(lbs[0], ubs[0], steps[0], [&](Value iv) { - ivStorage.push_back(iv); - generateParallelLoopNest(lbs.drop_front(), ubs.drop_front(), - steps.drop_front(), iteratorTypes.drop_front(), - bodyBuilderFn, ivStorage, distributionMethod); - }); + LoopNest singleLoop = buildLoopNest( + b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(), + [&](OpBuilder &b, Location loc, ValueRange ivs) { + ivStorage.append(ivs.begin(), ivs.end()); + generateParallelLoopNest(b, loc, lbs.drop_front(), ubs.drop_front(), + steps.drop_front(), + iteratorTypes.drop_front(), bodyBuilderFn, + ivStorage, distributionMethod); + }); return; } if (distributionMethod.empty()) { // Generate a single parallel loop-nest operation for all outermost // parallel loops and recurse. - edsc::OperationBuilder( - lbs.take_front(nOuterPar), ubs.take_front(nOuterPar), + b.create( + loc, lbs.take_front(nOuterPar), ubs.take_front(nOuterPar), steps.take_front(nOuterPar), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) { - edsc::ScopedContext context(nestedBuilder, nestedLoc); ivStorage.append(localIvs.begin(), localIvs.end()); generateParallelLoopNest( - lbs.drop_front(nOuterPar), ubs.drop_front(nOuterPar), - steps.drop_front(nOuterPar), iteratorTypes.drop_front(nOuterPar), - bodyBuilderFn, ivStorage, + nestedBuilder, nestedLoc, lbs.drop_front(nOuterPar), + ubs.drop_front(nOuterPar), steps.drop_front(nOuterPar), + iteratorTypes.drop_front(nOuterPar), bodyBuilderFn, ivStorage, (distributionMethod.size() < nOuterPar) ? ArrayRef() : distributionMethod.drop_front(nOuterPar)); @@ -394,15 +396,14 @@ case DistributionMethod::Cyclic: { // Generate a single parallel loop-nest operation for all outermost // parallel loops and recurse. - edsc::OperationBuilder( - lbs.take_front(numProcessed), ubs.take_front(numProcessed), + b.create( + loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed), steps.take_front(numProcessed), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) { - edsc::ScopedContext context(nestedBuilder, nestedLoc); ivStorage.append(localIvs.begin(), localIvs.end()); generateParallelLoopNest( - lbs.drop_front(numProcessed), ubs.drop_front(numProcessed), - steps.drop_front(numProcessed), + nestedBuilder, nestedLoc, lbs.drop_front(numProcessed), + ubs.drop_front(numProcessed), steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage, (distributionMethod.size() < numProcessed) ? ArrayRef() @@ -418,12 +419,13 @@ for (unsigned i = 1; i < numProcessed; ++i) cond = cond && slt(lbs[i], ubs[i]); ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed)); - edsc::conditionBuilder(cond, [&]() { + b.create(loc, cond, [&](OpBuilder &b, Location loc) { generateParallelLoopNest( - lbs.drop_front(numProcessed), ubs.drop_front(numProcessed), + b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed), steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed)); + b.create(loc, ValueRange{}); }); return; } @@ -432,7 +434,7 @@ // with inner loop generation. ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed)); generateParallelLoopNest( - lbs.drop_front(numProcessed), ubs.drop_front(numProcessed), + b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed), steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed)); return; @@ -442,9 +444,11 @@ /// Specialization for generating a mix of parallel and sequential scf loops. template <> void GenerateLoopNest::doit( - ArrayRef loopRanges, LinalgOp linalgOp, + OpBuilder &b, Location loc, ArrayRef loopRanges, LinalgOp linalgOp, ArrayRef iteratorTypes, - function_ref bodyBuilderFn, + function_ref + bodyBuilderFn, Optional distributionOptions) { auto iterArgInitValues = linalgOp.getOutputTensors(); assert(iterArgInitValues.empty() && "unexpected ParallelOp init values"); @@ -466,7 +470,7 @@ SmallVector distributionMethod; if (distributionOptions) { auto &options = distributionOptions.getValue(); - OpBuilder &builder = edsc::ScopedContext::getBuilderRef(); + OpBuilder &b = edsc::ScopedContext::getBuilderRef(); Location loc = edsc::ScopedContext::getLocation(); distributionMethod.assign(distributionOptions->distributionMethod.begin(), distributionOptions->distributionMethod.end()); @@ -478,14 +482,14 @@ if (distributionMethod.size() < parallelLoopRanges.size()) parallelLoopRanges.resize(distributionMethod.size()); SmallVector procInfo = - options.procInfo(builder, loc, parallelLoopRanges); + options.procInfo(b, loc, parallelLoopRanges); unsigned index = 0; for (auto iteratorType : enumerate(iteratorTypes)) { if (index >= procInfo.size()) break; if (isParallelIteratorType(iteratorType.value())) { unsigned i = iteratorType.index(); - updateBoundsForCyclicDistribution(builder, loc, procInfo[index].procId, + updateBoundsForCyclicDistribution(b, loc, procInfo[index].procId, procInfo[index].nprocs, lbsStorage[i], ubsStorage[i], stepsStorage[i]); index++; @@ -493,17 +497,17 @@ } } ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage); - auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) { - bodyBuilderFn(ivs, {}); - }; - generateParallelLoopNest(lbs, ubs, steps, iteratorTypes, - bodyBuilderWithoutIterArgsFn, ivs, - distributionMethod); + generateParallelLoopNest( + b, loc, lbs, ubs, steps, iteratorTypes, + [&](OpBuilder &b, Location loc, ValueRange ivs) { + bodyBuilderFn(b, loc, ivs, {}); + }, + ivs, distributionMethod); assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops"); } -SmallVector makeTiledShapes(OpBuilder &builder, Location loc, +SmallVector makeTiledShapes(OpBuilder &b, Location loc, LinalgOp linalgOp, ArrayRef tiledOperands, ValueRange ivs, ValueRange tileSizes, @@ -529,7 +533,7 @@ LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n"); } - MLIRContext *context = builder.getContext(); + MLIRContext *context = b.getContext(); SmallVector tiledShapes; tiledShapes.reserve(tiledOperands.size()); for (auto en : llvm::enumerate(tiledOperands)) { @@ -555,10 +559,10 @@ for (unsigned r = 0; r < rank; ++r) { LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r); if (!isTiled(map.getSubMap({r}), tileSizes)) { - offsets.push_back(builder.getIndexAttr(0)); + offsets.push_back(b.getIndexAttr(0)); Value dim = memref_dim(shapedOp, r).value; sizes.push_back(dim); - strides.push_back(builder.getIndexAttr(1)); + strides.push_back(b.getIndexAttr(1)); LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n"); continue; } @@ -568,10 +572,9 @@ // (i.e. the op does not subsample, stepping occurs in the loop). auto m = map.getSubMap({r}); LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: submap: " << map << "\n"); - auto offset = applyMapToValues(builder, loc, m, lbs).front(); + auto offset = applyMapToValues(b, loc, m, lbs).front(); offsets.push_back(offset); - auto closedIntSize = - applyMapToValues(builder, loc, m, subShapeSizes).front(); + auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front(); // Resulting size needs to be made half open interval again. auto size = closedIntSize + std_constant_index(1); LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n"); @@ -589,27 +592,29 @@ AffineExpr dim0, dim1, dim2; bindDims(context, dim0, dim1, dim2); // Compute min(size, dim - offset) to avoid out-of-bounds accesses. - auto minMap = AffineMap::get( - /*dimCount=*/3, /*symbolCount=*/0, {dim0, dim1 - dim2}, context); - Value d = memref_dim(shapedOp, r); + AffineMap minMap = + AffineMap::inferFromExprList( + ArrayRef>{{dim0, dim1 - dim2}}) + .front(); + Value d = b.create(loc, shapedOp, r); SmallVector operands{size, d, offset}; fullyComposeAffineMapAndOperands(&minMap, &operands); - size = affine_min(builder.getIndexType(), minMap, operands); + size = b.create(loc, b.getIndexType(), minMap, operands); } sizes.push_back(size); LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: new offset: " << offset << "\n"); LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: new size: " << size << "\n"); - strides.push_back(builder.getIndexAttr(1)); + strides.push_back(b.getIndexAttr(1)); } if (shapedType.isa()) - tiledShapes.push_back(builder.create( - loc, shapedOp, offsets, sizes, strides)); + tiledShapes.push_back( + b.create(loc, shapedOp, offsets, sizes, strides)); else tiledShapes.push_back( - builder.create(loc, shapedOp, offsets, sizes, strides)); + b.create(loc, shapedOp, offsets, sizes, strides)); } return tiledShapes; diff --git a/mlir/lib/Dialect/SCF/CMakeLists.txt b/mlir/lib/Dialect/SCF/CMakeLists.txt --- a/mlir/lib/Dialect/SCF/CMakeLists.txt +++ b/mlir/lib/Dialect/SCF/CMakeLists.txt @@ -1,6 +1,5 @@ add_mlir_dialect_library(MLIRSCF SCF.cpp - EDSC/Builders.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/LoopOps @@ -9,7 +8,6 @@ MLIRSCFOpsIncGen LINK_LIBS PUBLIC - MLIREDSC MLIRIR MLIRLoopLikeInterface MLIRMemRef diff --git a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp deleted file mode 100644 --- a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//===- Builders.cpp - MLIR Declarative Builder Classes --------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/SCF/EDSC/Builders.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/IR/AffineExpr.h" -#include "mlir/IR/AffineMap.h" - -using namespace mlir; -using namespace mlir::edsc; - -mlir::scf::LoopNest -mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps, - function_ref fun) { - // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into - // the expected function interface. - assert(ScopedContext::getContext() && "EDSC ScopedContext not set up"); - return mlir::scf::buildLoopNest( - ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs, - steps, [&](OpBuilder &builder, Location loc, ValueRange ivs) { - ScopedContext context(builder, loc); - if (fun) - fun(ivs); - }); -} - -mlir::scf::LoopNest -mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step, - function_ref fun) { - // Delegates to the ValueRange-based version by wrapping the lambda. - auto wrapper = [&](ValueRange ivs) { - assert(ivs.size() == 1); - if (fun) - fun(ivs[0]); - }; - return loopNestBuilder(ValueRange(lb), ValueRange(ub), ValueRange(step), - wrapper); -} - -mlir::scf::LoopNest mlir::edsc::loopNestBuilder( - Value lb, Value ub, Value step, ValueRange iterArgInitValues, - function_ref fun) { - // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into - // the expected function interface. - assert(ScopedContext::getContext() && "EDSC ScopedContext not set up"); - return mlir::scf::buildLoopNest( - ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lb, ub, - step, iterArgInitValues, - [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) { - assert(ivs.size() == 1 && "expected one induction variable"); - ScopedContext context(builder, loc); - if (fun) - return fun(ivs[0], args); - return scf::ValueVector(iterArgInitValues.begin(), - iterArgInitValues.end()); - }); -} - -mlir::scf::LoopNest mlir::edsc::loopNestBuilder( - ValueRange lbs, ValueRange ubs, ValueRange steps, - ValueRange iterArgInitValues, - function_ref fun) { - // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into - // the expected function interface. - assert(ScopedContext::getContext() && "EDSC ScopedContext not set up"); - return mlir::scf::buildLoopNest( - ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs, - steps, iterArgInitValues, - [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) { - ScopedContext context(builder, loc); - if (fun) - return fun(ivs, args); - return scf::ValueVector(iterArgInitValues.begin(), - iterArgInitValues.end()); - }); -} - -static std::function -wrapIfBody(function_ref body, TypeRange expectedTypes) { - (void)expectedTypes; - return [=](OpBuilder &builder, Location loc) { - ScopedContext context(builder, loc); - scf::ValueVector returned = body(); - assert(ValueRange(returned).getTypes() == expectedTypes && - "'if' body builder returned values of unexpected type"); - builder.create(loc, returned); - }; -} - -ValueRange -mlir::edsc::conditionBuilder(TypeRange results, Value condition, - function_ref thenBody, - function_ref elseBody, - scf::IfOp *ifOp) { - assert(ScopedContext::getContext() && "EDSC ScopedContext not set up"); - assert(thenBody && "thenBody is mandatory"); - - auto newOp = ScopedContext::getBuilderRef().create( - ScopedContext::getLocation(), results, condition, - wrapIfBody(thenBody, results), wrapIfBody(elseBody, results)); - if (ifOp) - *ifOp = newOp; - return newOp.getResults(); -} - -static std::function -wrapZeroResultIfBody(function_ref body) { - return [=](OpBuilder &builder, Location loc) { - ScopedContext context(builder, loc); - body(); - builder.create(loc); - }; -} - -ValueRange mlir::edsc::conditionBuilder(Value condition, - function_ref thenBody, - function_ref elseBody, - scf::IfOp *ifOp) { - assert(ScopedContext::getContext() && "EDSC ScopedContext not set up"); - assert(thenBody && "thenBody is mandatory"); - - auto newOp = ScopedContext::getBuilderRef().create( - ScopedContext::getLocation(), condition, wrapZeroResultIfBody(thenBody), - elseBody ? llvm::function_ref( - wrapZeroResultIfBody(elseBody)) - : llvm::function_ref(nullptr)); - if (ifOp) - *ifOp = newOp; - return {}; -} diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir --- a/mlir/test/Dialect/Linalg/affine.mlir +++ b/mlir/test/Dialect/Linalg/affine.mlir @@ -24,18 +24,18 @@ // CHECK-SAME: [[M:arg[0-9]+]]: index // CHECK-SAME: [[N:arg[0-9]+]]: index // CHECK-SAME: [[K:arg[0-9]+]]: index -// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECK: affine.for %{{.*}} = 0 to %{{.*}} { -// CHECK: affine.for %{{.*}} = 0 to %{{.*}} { -// CHECK: affine.for %{{.*}} = 0 to %{{.*}} { -// CHECK-DAG: %[[a:.*]] = affine.load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECK-DAG: %[[b:.*]] = affine.load %[[B]][%{{.*}}, %{{.*}}] : memref +// CHECK: %[[A:.*]] = memref.view %{{.*}} : memref to memref +// CHECK: %[[B:.*]] = memref.view %{{.*}} : memref to memref +// CHECK: %[[C:.*]] = memref.view %{{.*}} : memref to memref +// CHECK: affine.for +// CHECK: affine.for +// CHECK: affine.for +// CHECK-DAG: %[[a:.*]] = affine.load %[[A]]{{.*}} : memref +// CHECK-DAG: %[[b:.*]] = affine.load %[[B]]{{.*}} : memref // CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECK-DAG: %[[c:.*]] = affine.load %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECK-DAG: %[[c:.*]] = affine.load %[[C]]{{.*}} : memref // CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECK: affine.store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECK: affine.store %[[res]], %[[C]]{{.*}} : memref func @conv_view3(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref, memref, memref @@ -49,12 +49,12 @@ // CHECK: %[[K:.*]] = memref.dim %arg0, %c2 : memref // CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref // CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref -// CHECK: affine.for %{{.*}} = 0 to %[[B]] { -// CHECK: affine.for %{{.*}} = 0 to %[[X0]] { -// CHECK: affine.for %{{.*}} = 0 to %[[K]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Q]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] { -// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECK: affine.for {{.*}}0 to %[[B]] { +// CHECK: affine.for {{.*}}0 to %[[X0]] { +// CHECK: affine.for {{.*}}0 to %[[K]] { +// CHECK: affine.for {{.*}}0 to %[[Q]] { +// CHECK: affine.for {{.*}}0 to %[[Z0]] { +// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]]{{.*}} // No padding needed here; only affine loads. // CHECK-NEXT: affine.load // CHECK-NEXT: affine.load @@ -78,26 +78,26 @@ // CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref // CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref // CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref -// CHECK: affine.for %{{.*}} = 0 to %[[B]] { -// CHECK: affine.for %{{.*}} = 0 to %[[X0]] { -// CHECK: affine.for %{{.*}} = 0 to %[[X1]] { -// CHECK: affine.for %{{.*}} = 0 to %[[K]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Q]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] { -// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECK: affine.for {{.*}}0 to %[[B]] { +// CHECK: affine.for {{.*}}0 to %[[X0]] { +// CHECK: affine.for {{.*}}0 to %[[X1]] { +// CHECK: affine.for {{.*}}0 to %[[K]] { +// CHECK: affine.for {{.*}}0 to %[[Q]] { +// CHECK: affine.for {{.*}}0 to %[[Z0]] { +// CHECK: affine.for {{.*}}0 to %[[Z1]] { +// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}} +// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}} // CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) // CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]]) // Padded conv involves an affine.max in the memory access and this is not // allowed by affine.load. Use memref.load in such cases. -// CHECK: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref -// CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECK: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECK: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECK: select {{.*}} : f32 +// CHECK: affine.load +// CHECK: mulf {{.*}} : f32 +// CHECK: affine.load +// CHECK: addf {{.*}} : f32 +// CHECK: affine.store //----------------------------------------------------------------------------// // Named ops to loops. @@ -115,10 +115,10 @@ // CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref // CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref // CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref -// CHECK: affine.for %[[b:.*]] = 0 to %[[B]] { -// CHECK: affine.for %[[m:.*]] = 0 to %[[M]] { -// CHECK: affine.for %[[n:.*]] = 0 to %[[N]] { -// CHECK: affine.for %[[k:.*]] = 0 to %[[K]] { +// CHECK: affine.for %[[b:.*]] = {{.*}}0 to %[[B]] { +// CHECK: affine.for %[[m:.*]] = {{.*}}0 to %[[M]] { +// CHECK: affine.for %[[n:.*]] = {{.*}}0 to %[[N]] { +// CHECK: affine.for %[[k:.*]] = {{.*}}0 to %[[K]] { // CHECK: %[[va:.*]] = affine.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref // CHECK: %[[vb:.*]] = affine.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref // CHECK: %[[vc:.*]] = affine.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -1,21 +1,21 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck --check-prefix=CHECKLOOP %s +// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck %s // RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s // Test that we can lower all the way to LLVM without crashing, don't check results here. // RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -o=/dev/null 2>&1 -// CHECKLOOP-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> -// CHECKLOOP-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> -// CHECKLOOP-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> -// CHECKLOOP-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> -// CHECKLOOP-DAG: #[[$clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> +// CHECK-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> +// CHECK-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> +// CHECK-DAG: #[[$clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> -// CHECKLOOP-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECKLOOP-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> -// CHECKLOOP-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> -// CHECKLOOP-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> -// CHECKLOOP-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)> -// CHECKLOOP-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)> +// CHECK-DAG: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> +// CHECK-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> +// CHECK-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECK-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)> +// CHECK-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)> // CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECKPARALLEL-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> @@ -30,7 +30,6 @@ // CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding1:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 1)> // CHECKPARALLEL-DAG: #[[$stride1Dilation1Padding2:.*]] = affine_map<(d0, d1) -> (d0 + d1 - 2)> - func @matmul(%arg0: memref, %M: index, %N: index, %K: index) { %c0 = constant 0 : index %c1 = constant 1 : index @@ -41,22 +40,22 @@ outs(%C: memref) return } -// CHECKLOOP-LABEL: func @matmul(%{{.*}}: memref, -// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index -// CHECKLOOP-SAME: [[N:arg[0-9]+]]: index -// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index -// CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECKLOOP-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref -// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref -// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @matmul(%{{.*}}: memref, +// CHECK-SAME: [[M:arg[0-9]+]]: index +// CHECK-SAME: [[N:arg[0-9]+]]: index +// CHECK-SAME: [[K:arg[0-9]+]]: index +// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref +// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref +// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref +// CHECK: scf.for {{.*}} to %[[M]] +// CHECK: scf.for {{.*}} to %[[N]] +// CHECK: scf.for {{.*}} to %[[K]] +// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref +// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref +// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECK: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref // CHECKPARALLEL-LABEL: func @matmul(%{{.*}}: memref, // CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index @@ -65,8 +64,8 @@ // CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref // CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref // CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} { +// CHECKPARALLEL: scf.for {{.*}} to %[[K]] // CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref // CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref // CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 @@ -86,20 +85,20 @@ outs(%4 : memref) return } -// CHECKLOOP-LABEL: func @matvec(%{{.*}}: memref, -// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index -// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index -// CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECKLOOP-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref -// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref -// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}] : memref +// CHECK-LABEL: func @matvec(%{{.*}}: memref, +// CHECK-SAME: [[M:arg[0-9]+]]: index +// CHECK-SAME: [[K:arg[0-9]+]]: index +// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref +// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref +// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref +// CHECK: scf.for {{.*}} to %[[M]] +// CHECK: scf.for {{.*}} to %[[K]] +// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref +// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref +// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref +// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECK: store %[[res]], %[[C]][%{{.*}}] : memref // CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref, // CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index @@ -108,7 +107,7 @@ // CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref // CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref to memref // CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL: scf.for {{.*}} to %[[K]] // CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref // CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref // CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 @@ -127,25 +126,25 @@ outs(%3 : memref) return } -// CHECKLOOP-LABEL: func @dot(%{{.*}}: memref, -// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index -// CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref to memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref -// CHECKLOOP-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref -// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP-DAG: %[[c:.*]] = memref.load %[[C]][] : memref -// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[C]][] : memref +// CHECK-LABEL: func @dot(%{{.*}}: memref, +// CHECK-SAME: [[K:arg[0-9]+]]: index +// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref +// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref to memref +// CHECK: scf.for {{.*}} to %[[K]] +// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref +// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref +// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][] : memref +// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECK: store %[[res]], %[[C]][] : memref // CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref, // CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index // CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref to memref -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL: scf.for {{.*}} to %[[K]] // CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref // CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref // CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 @@ -160,21 +159,21 @@ outs(%arg2: memref) return } -// CHECKLOOP-LABEL: func @dot_view( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c0 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref -// CHECKLOOP-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref -// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP-DAG: %[[c:.*]] = memref.load %{{.*}}[] : memref -// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %{{.*}}[] : memref +// CHECK-LABEL: func @dot_view( +// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECK: %[[K:.*]] = memref.dim %arg0, %c0 : memref +// CHECK: scf.for {{.*}} to %[[K]] +// CHECK-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref +// CHECK-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref +// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK-DAG: %[[c:.*]] = memref.load %{{.*}}[] : memref +// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 +// CHECK: store %[[res]], %{{.*}}[] : memref // CHECKPARALLEL-LABEL: func @dot_view( // CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { // CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c0 : memref -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL: scf.for {{.*}} to %[[K]] // CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref // CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref // CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 @@ -186,10 +185,10 @@ linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECKLOOP-LABEL: func @fill_view( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: f32) { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}] : memref +// CHECK-LABEL: func @fill_view( +// CHECK: %{{.*}}: memref, %{{.*}}: f32) { +// CHECK: scf.for {{.*}} to %{{.*}} +// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}] : memref // CHECKPARALLEL-LABEL: func @fill_view( // CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: f32) { @@ -200,8 +199,8 @@ linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECKLOOP-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { -// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref +// CHECK-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { +// CHECK: store %{{.*}}, %{{.*}}[] : memref // CHECKPARALLEL-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { // CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref @@ -210,27 +209,27 @@ linalg.fill(%arg0, %arg1) : memref, f32 return } -// CHECKLOOP-LABEL: func @fill_view3( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: f32) { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @fill_view3( +// CHECK: %{{.*}}: memref, %{{.*}}: f32) { +// CHECK: scf.for {{.*}} to %{{.*}} +// CHECK: scf.for {{.*}} to %{{.*}} +// CHECK: scf.for {{.*}} to %{{.*}} +// CHECK: store %{{.*}}, {{.*}} : memref // CHECKPARALLEL-LABEL: func @fill_view3( // CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: f32) { // CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref func @copy_view(%arg0: memref, %arg1: memref) { linalg.copy(%arg0, %arg1) : memref, memref return } -// CHECKLOOP-LABEL: func @copy_view( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref -// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}] : memref +// CHECK-LABEL: func @copy_view( +// CHECK: %{{.*}}: memref, %{{.*}}: memref) { +// CHECK: scf.for {{.*}} to %{{.*}} +// CHECK: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref +// CHECK: store %[[L]], %{{.*}}[%{{.*}}] : memref // CHECKPARALLEL-LABEL: func @copy_view( // CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref) { @@ -242,12 +241,12 @@ linalg.copy(%arg0, %arg1) : memref, memref return } -// CHECKLOOP-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[] : memref -// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref +// CHECK-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { +// CHECK: memref.load %{{.*}}[] : memref +// CHECK: store %{{.*}}, %{{.*}}[] : memref // CHECKPARALLEL-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[] : memref +// CHECKPARALLEL: memref.load %{{.*}}[] : memref // CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref func @copy_view3(%arg0: memref, %arg1: memref) { @@ -256,43 +255,43 @@ memref, memref return } -// CHECKLOOP-LABEL: func @copy_view3 -// CHECKLOOP: (%{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @copy_view3 +// CHECK: (%{{.*}}: memref, %{{.*}}: memref) { +// CHECK: scf.for {{.*}} to %{{.*}} +// CHECK: scf.for {{.*}} to %{{.*}} +// CHECK: scf.for {{.*}} to %{{.*}} +// CHECK: %[[L:.*]] = memref.load {{.*}} : memref +// CHECK: store %[[L]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @copy_view3 // CHECKPARALLEL: (%{{.*}}: memref, %{{.*}}: memref) { // CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[L:.*]] = memref.load {{.*}} : memref +// CHECKPARALLEL: store %[[L]], {{.*}} : memref func @conv_view3(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref, memref, memref return } -// CHECKLOOP-LABEL: func @conv_view3( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref -// CHECKLOOP: %[[Q:.*]] = memref.dim %arg0, %c1 : memref -// CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c2 : memref -// CHECKLOOP: %[[B:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[X0:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKLOOP: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @conv_view3( +// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref +// CHECK: %[[Q:.*]] = memref.dim %arg0, %c1 : memref +// CHECK: %[[K:.*]] = memref.dim %arg0, %c2 : memref +// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[B]] +// CHECK: scf.for {{.*}} to %[[X0]] +// CHECK: scf.for {{.*}} to %[[K]] +// CHECK: scf.for {{.*}} to %[[Q]] +// CHECK: scf.for {{.*}} to %[[Z0]] +// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]] +// CHECK: memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref +// CHECK: memref.load {{.*}} : memref +// CHECK: mulf +// CHECK: memref.load {{.*}} : memref +// CHECK: addf +// CHECK: store %{{.*}}, {{.*}} : memref // CHECKPARALLEL-LABEL: func @conv_view3( // CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { @@ -302,44 +301,44 @@ // CHECKPARALLEL: %[[B:.*]] = memref.dim %arg1, %c0 : memref // CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref // CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.for {{.*}} to %[[Q]] +// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]] +// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]] +// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref +// CHECKPARALLEL: memref.load {{.*}} : memref +// CHECKPARALLEL: mulf +// CHECKPARALLEL: memref.load {{.*}} : memref +// CHECKPARALLEL: addf +// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref func @conv_view4(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref, memref, memref return } -// CHECKLOOP-LABEL: func @conv_view4( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref -// CHECKLOOP: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref -// CHECKLOOP: %[[Q:.*]] = memref.dim %arg0, %c2 : memref -// CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c3 : memref -// CHECKLOOP: %[[B:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[X0:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: %[[X1:.*]] = memref.dim %arg2, %c2 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKLOOP: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @conv_view4( +// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref +// CHECK: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref +// CHECK: %[[Q:.*]] = memref.dim %arg0, %c2 : memref +// CHECK: %[[K:.*]] = memref.dim %arg0, %c3 : memref +// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref +// CHECK: scf.for {{.*}} to %[[B]] +// CHECK: scf.for {{.*}} to %[[X0]] +// CHECK: scf.for {{.*}} to %[[X1]] +// CHECK: scf.for {{.*}} to %[[K]] +// CHECK: scf.for {{.*}} to %[[Q]] +// CHECK: scf.for {{.*}} to %[[Z0]] +// CHECK: scf.for {{.*}} to %[[Z1]] +// CHECK: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]] +// CHECK: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]] +// CHECK: memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref +// CHECK: memref.load {{.*}} : memref +// CHECK: mulf +// CHECK: memref.load {{.*}} : memref +// CHECK: addf +// CHECK: store %{{.*}}, {{.*}} : memref // CHECKPARALLEL-LABEL: func @conv_view4( // CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { @@ -351,17 +350,17 @@ // CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref // CHECKPARALLEL: %[[X1:.*]] = memref.dim %arg2, %c2 : memref // CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.for {{.*}} to %[[Q]] +// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]] +// CHECKPARALLEL: scf.for {{.*}} to %[[Z1]] +// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]] +// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]] +// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref +// CHECKPARALLEL: memref.load {{.*}} : memref +// CHECKPARALLEL: mulf +// CHECKPARALLEL: memref.load {{.*}} : memref +// CHECKPARALLEL: addf +// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref func @conv_padding(%arg0: memref, %arg1: memref, @@ -372,34 +371,34 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @conv_padding -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %[[ZERO:.*]] = constant 0.000000e+00 : f32 -// CHECKLOOP: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref -// CHECKLOOP: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref -// CHECKLOOP: %[[Q:.*]] = memref.dim %arg0, %c2 : memref -// CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c3 : memref -// CHECKLOOP: %[[B:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[X0:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: %[[X1:.*]] = memref.dim %arg2, %c2 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKLOOP: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) -// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]]) -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @conv_padding +// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECK: %[[ZERO:.*]] = constant 0.000000e+00 : f32 +// CHECK: %[[Z0:.*]] = memref.dim %arg0, %c0 : memref +// CHECK: %[[Z1:.*]] = memref.dim %arg0, %c1 : memref +// CHECK: %[[Q:.*]] = memref.dim %arg0, %c2 : memref +// CHECK: %[[K:.*]] = memref.dim %arg0, %c3 : memref +// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref +// CHECK: scf.for {{.*}} to %[[B]] +// CHECK: scf.for {{.*}} to %[[X0]] +// CHECK: scf.for {{.*}} to %[[X1]] +// CHECK: scf.for {{.*}} to %[[K]] +// CHECK: scf.for {{.*}} to %[[Q]] +// CHECK: scf.for {{.*}} to %[[Z0]] +// CHECK: scf.for {{.*}} to %[[Z1]] +// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}} +// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}} +// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) +// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]]) +// CHECK: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECK: select %{{.*}}, +// CHECK: memref.load {{.*}} : memref +// CHECK: mulf +// CHECK: memref.load {{.*}} : memref +// CHECK: addf +// CHECK: store %{{.*}}, {{.*}} : memref // CHECKPARALLEL-LABEL: func @conv_padding // CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { @@ -412,20 +411,20 @@ // CHECKPARALLEL: %[[X0:.*]] = memref.dim %arg2, %c1 : memref // CHECKPARALLEL: %[[X1:.*]] = memref.dim %arg2, %c2 : memref // CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKPARALLEL: scf.for {{.*}} to %[[Q]] +// CHECKPARALLEL: scf.for {{.*}} to %[[Z0]] +// CHECKPARALLEL: scf.for {{.*}} to %[[Z1]] +// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}} +// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}} // CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) // CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]]) -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECKPARALLEL: select %{{.*}}, +// CHECKPARALLEL: memref.load {{.*}} : memref +// CHECKPARALLEL: mulf +// CHECKPARALLEL: memref.load {{.*}} : memref +// CHECKPARALLEL: addf +// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref func @pooling_max(%arg0: memref, %arg1: memref, @@ -434,36 +433,36 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_max -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_max +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] +// CHECK: memref.load {{.*}} : memref +// CHECK: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECK: %[[RES:.*]] = select %{{.*}}, +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_max // CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] +// CHECKPARALLEL: memref.load {{.*}} : memref +// CHECKPARALLEL: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref func @pooling_max_padding(%arg0: memref, %arg1: memref, @@ -472,26 +471,26 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_max_padding -// CHECKLOOP: %[[PAD:.*]] = constant 0xFF800000 : f32 -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) -// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) -// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref -// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 -// CHECKLOOP: %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32 -// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_max_padding +// CHECK: %[[PAD:.*]] = constant 0xFF800000 : f32 +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref +// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECK: %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32 +// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_max_padding // CHECKPARALLEL: %[[PAD:.*]] = constant 0xFF800000 : f32 @@ -499,19 +498,19 @@ // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref // CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) // CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) // CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref // CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 // CHECKPARALLEL: %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32 // CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref func @pooling_max_padding_i32(%arg0: memref, %arg1: memref, @@ -520,26 +519,26 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_max_padding_i32 -// CHECKLOOP: %[[PAD:.*]] = constant -2147483648 : i32 -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) -// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) -// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref -// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 -// CHECKLOOP: %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32 -// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_max_padding_i32 +// CHECK: %[[PAD:.*]] = constant -2147483648 : i32 +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref +// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECK: %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32 +// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_max_padding_i32 // CHECKPARALLEL: %[[PAD:.*]] = constant -2147483648 : i32 @@ -547,19 +546,19 @@ // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref // CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) // CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) // CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref // CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 // CHECKPARALLEL: %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32 // CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref func @pooling_min(%arg0: memref, %arg1: memref, @@ -568,36 +567,36 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_min -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_min +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] +// CHECK: memref.load {{.*}} : memref +// CHECK: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECK: %[[RES:.*]] = select %{{.*}}, +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_min // CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] +// CHECKPARALLEL: memref.load {{.*}} : memref +// CHECKPARALLEL: memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref func @pooling_min_padding(%arg0: memref, %arg1: memref, @@ -606,26 +605,26 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_min_padding -// CHECKLOOP: %[[PAD:.*]] = constant 0x7F800000 : f32 -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) -// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) -// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref -// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 -// CHECKLOOP: %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32 -// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_min_padding +// CHECK: %[[PAD:.*]] = constant 0x7F800000 : f32 +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref +// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECK: %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32 +// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_min_padding // CHECKPARALLEL: %[[PAD:.*]] = constant 0x7F800000 : f32 @@ -633,19 +632,19 @@ // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref // CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) // CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) // CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref // CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 // CHECKPARALLEL: %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32 // CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref func @pooling_min_padding_i32(%arg0: memref, %arg1: memref, @@ -654,26 +653,26 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_min_padding_i32 -// CHECKLOOP: %[[PAD:.*]] = constant 2147483647 : i32 -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) -// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) -// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref -// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 -// CHECKLOOP: %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32 -// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_min_padding_i32 +// CHECK: %[[PAD:.*]] = constant 2147483647 : i32 +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref +// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECK: %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32 +// CHECK: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_min_padding_i32 // CHECKPARALLEL: %[[PAD:.*]] = constant 2147483647 : i32 @@ -681,19 +680,19 @@ // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref // CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) // CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) // CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref // CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 // CHECKPARALLEL: %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32 // CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref func @pooling_sum(%arg0: memref, %arg1: memref, @@ -702,36 +701,36 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_sum -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_sum +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] +// CHECK: %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref +// CHECK: %[[LHS:.*]] = memref.load {{.*}} : memref +// CHECK: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_sum // CHECKPARALLEL: %[[WX:.*]] = memref.dim %arg1, %c0 : memref // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride2Dilation1]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1]] // CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[LHS:.*]] = memref.load {{.*}} : memref // CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref func @pooling_sum_padding(%arg0: memref, %arg1: memref, @@ -740,25 +739,25 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_sum_padding -// CHECKLOOP: %[[PAD:.*]] = constant 0.000000e+00 : f32 -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) -// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) -// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref -// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 -// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_sum_padding +// CHECK: %[[PAD:.*]] = constant 0.000000e+00 : f32 +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 +// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref +// CHECK: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32 +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_sum_padding // CHECKPARALLEL: %[[PAD:.*]] = constant 0.000000e+00 : f32 @@ -766,18 +765,18 @@ // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] // CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) // CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) // CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref // CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32 -// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref // CHECKPARALLEL: %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref func @pooling_sum_padding_i32(%arg0: memref, %arg1: memref, @@ -786,25 +785,25 @@ memref, memref, memref return } -// CHECKLOOP-LABEL: func @pooling_sum_padding_i32 -// CHECKLOOP: %[[PAD:.*]] = constant 0 : i32 -// CHECKLOOP: %[[WX:.*]] = memref.dim %arg1, %c0 : memref -// CHECKLOOP: %[[WY:.*]] = memref.dim %arg1, %c1 : memref -// CHECKLOOP: %[[OX:.*]] = memref.dim %arg2, %c0 : memref -// CHECKLOOP: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) -// CHECKLOOP: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) -// CHECKLOOP: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref -// CHECKLOOP: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 -// CHECKLOOP: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-LABEL: func @pooling_sum_padding_i32 +// CHECK: %[[PAD:.*]] = constant 0 : i32 +// CHECK: %[[WX:.*]] = memref.dim %arg1, %c0 : memref +// CHECK: %[[WY:.*]] = memref.dim %arg1, %c1 : memref +// CHECK: %[[OX:.*]] = memref.dim %arg2, %c0 : memref +// CHECK: %[[OY:.*]] = memref.dim %arg2, %c1 : memref +// CHECK: scf.for {{.*}} to %[[OX]] +// CHECK: scf.for {{.*}} to %[[OY]] +// CHECK: scf.for {{.*}} to %[[WX]] +// CHECK: scf.for {{.*}} to %[[WY]] +// CHECK: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECK: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] +// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) +// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) +// CHECK: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref +// CHECK: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 +// CHECK: %[[RHS:.*]] = memref.load {{.*}} : memref +// CHECK: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32 +// CHECK: store %[[RES]], {{.*}} : memref // CHECKPARALLEL-LABEL: func @pooling_sum_padding_i32 // CHECKPARALLEL: %[[PAD:.*]] = constant 0 : i32 @@ -812,18 +811,18 @@ // CHECKPARALLEL: %[[WY:.*]] = memref.dim %arg1, %c1 : memref // CHECKPARALLEL: %[[OX:.*]] = memref.dim %arg2, %c0 : memref // CHECKPARALLEL: %[[OY:.*]] = memref.dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: scf.parallel {{.*}} to (%[[OX]], %[[OY]]) +// CHECKPARALLEL: scf.for {{.*}} to %[[WX]] +// CHECKPARALLEL: scf.for {{.*}} to %[[WY]] +// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]] +// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]] // CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]]) // CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]]) // CHECKPARALLEL: %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref // CHECKPARALLEL: %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32 -// CHECKPARALLEL: %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %[[RHS:.*]] = memref.load {{.*}} : memref // CHECKPARALLEL: %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: store %[[RES]], {{.*}} : memref #accesses = [ affine_map<(i, j, k) -> (i, j)>, @@ -850,17 +849,17 @@ } return } -// CHECKLOOP-LABEL: @generic_region -// CHECKLOOP: scf.for %[[i:.*]] = {{.*}} -// CHECKLOOP: scf.for %[[j:.*]] = {{.*}} -// CHECKLOOP: scf.for %[[k:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECKLOOP: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECKLOOP: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECKLOOP: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP: %[[e:.*]] = addf %[[c]], %[[d]] : f32 -// CHECKLOOP: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECKLOOP: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECK-LABEL: @generic_region +// CHECK: scf.for %[[i:.*]] = {{.*}} +// CHECK: scf.for %[[j:.*]] = {{.*}} +// CHECK: scf.for %[[k:.*]] = {{.*}} +// CHECK: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref +// CHECK: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECK: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref +// CHECK: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK: %[[e:.*]] = addf %[[c]], %[[d]] : f32 +// CHECK: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref +// CHECK: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref // CHECKPARALLEL-LABEL: @generic_region // CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) @@ -905,21 +904,21 @@ return } -// CHECKLOOP-LABEL: @generic_index_region -// CHECKLOOP: scf.for %[[i:.*]] = {{.*}} -// CHECKLOOP: scf.for %[[j:.*]] = {{.*}} -// CHECKLOOP: scf.for %[[k:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] -// CHECKLOOP: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECKLOOP: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] -// CHECKLOOP: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECKLOOP: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index -// CHECKLOOP: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 -// CHECKLOOP: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 -// CHECKLOOP: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 -// CHECKLOOP: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECKLOOP: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] +// CHECK-LABEL: @generic_index_region +// CHECK: scf.for %[[i:.*]] = {{.*}} +// CHECK: scf.for %[[j:.*]] = {{.*}} +// CHECK: scf.for %[[k:.*]] = {{.*}} +// CHECK: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] +// CHECK: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECK: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] +// CHECK: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 +// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECK: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index +// CHECK: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 +// CHECK: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 +// CHECK: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 +// CHECK: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] +// CHECK: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] // CHECKPARALLEL-LABEL: @generic_index_region // CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) @@ -961,13 +960,13 @@ return } -// CHECKLOOP-LABEL: @generic_op_zero_rank -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> -// CHECKLOOP: scf.for %[[i:.*]] = {{.*}} -// CHECKLOOP: scf.for %[[j:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = memref.load %[[ARG0]][] -// CHECKLOOP: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] +// CHECK-LABEL: @generic_op_zero_rank +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> +// CHECK: scf.for %[[i:.*]] = {{.*}} +// CHECK: scf.for %[[j:.*]] = {{.*}} +// CHECK: %[[a:.*]] = memref.load %[[ARG0]][] +// CHECK: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] // CHECKPARALLEL-LABEL: @generic_op_zero_rank // CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref @@ -992,16 +991,16 @@ return } -// CHECKLOOP-LABEL: @generic_index_op_zero_rank -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> -// CHECKLOOP: scf.for %[[i:.*]] = {{.*}} -// CHECKLOOP: scf.for %[[j:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = memref.load %[[ARG0]][ -// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECKLOOP: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 -// CHECKLOOP: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 -// CHECKLOOP: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] +// CHECK-LABEL: @generic_index_op_zero_rank +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> +// CHECK: scf.for %[[i:.*]] = {{.*}} +// CHECK: scf.for %[[j:.*]] = {{.*}} +// CHECK: %[[a:.*]] = memref.load %[[ARG0]][ +// CHECK: %[[ij:.*]] = addi %[[i]], %[[j]] : index +// CHECK: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 +// CHECK: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 +// CHECK: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] // CHECKPARALLEL-LABEL: @generic_index_op_zero_rank // CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref @@ -1037,14 +1036,14 @@ } return } -// CHECKLOOP-LABEL: @generic_op_1D_reduce -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP: scf.for %[[i:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] -// CHECKLOOP: %[[b:.*]] = memref.load %[[ARG1]][] -// CHECKLOOP: %[[c:.*]] = addf %[[a]], %[[b]] : f32 -// CHECKLOOP: store %[[c]], %[[ARG1]][] +// CHECK-LABEL: @generic_op_1D_reduce +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECK: scf.for %[[i:.*]] = {{.*}} +// CHECK: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] +// CHECK: %[[b:.*]] = memref.load %[[ARG1]][] +// CHECK: %[[c:.*]] = addf %[[a]], %[[b]] : f32 +// CHECK: store %[[c]], %[[ARG1]][] // CHECKPARALLEL-LABEL: @generic_op_1D_reduce // CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref @@ -1087,17 +1086,17 @@ } return } -// CHECKLOOP-LABEL: @generic_index_op_1D_reduce -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP: scf.for %[[i:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] -// CHECKLOOP: %[[b:.*]] = memref.load %[[ARG1]][] -// CHECKLOOP: %[[c:.*]] = memref.load %[[ARG2]][] -// CHECKLOOP: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] -// CHECKLOOP: %[[e:.*]] = addf %[[a]], %[[d]] -// CHECKLOOP: store %[[e]], %[[ARG2]][] +// CHECK-LABEL: @generic_index_op_1D_reduce +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref +// CHECK: scf.for %[[i:.*]] = {{.*}} +// CHECK: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] +// CHECK: %[[b:.*]] = memref.load %[[ARG1]][] +// CHECK: %[[c:.*]] = memref.load %[[ARG2]][] +// CHECK: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] +// CHECK: %[[e:.*]] = addf %[[a]], %[[d]] +// CHECK: store %[[e]], %[[ARG2]][] // CHECKPARALLEL-LABEL: @generic_index_op_1D_reduce // CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref @@ -1126,11 +1125,11 @@ } return } -// CHECKLOOP-LABEL: @generic_const_init -// CHECKLOOP-SAME: %[[ARG0:.*]]: memref -// CHECKLOOP: %[[CONST:.*]] = constant 1.000000e+00 : f32 -// CHECKLOOP: scf.for %[[i:.*]] = {{.*}} -// CHECKLOOP: store %[[CONST]], %[[ARG0]] +// CHECK-LABEL: @generic_const_init +// CHECK-SAME: %[[ARG0:.*]]: memref +// CHECK: %[[CONST:.*]] = constant 1.000000e+00 : f32 +// CHECK: scf.for %[[i:.*]] = {{.*}} +// CHECK: store %[[CONST]], %[[ARG0]] // CHECKPARALLEL-LABEL: @generic_const_init // CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref @@ -1165,18 +1164,18 @@ } return } -// CHECKLOOP-LABEL: @scalar_code -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-NOT: scf.for -// CHECKLOOP: memref.load %[[ARG0]][] -// CHECKLOOP: memref.load %[[ARG1]][] -// CHECKLOOP: scf.if -// CHECKLOOP: scf.yield -// CHECKLOOP: else -// CHECKLOOP: scf.yield -// CHECKLOOP: store %{{.*}}, %[[ARG2]][] +// CHECK-LABEL: @scalar_code +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref +// CHECK-NOT: scf.for +// CHECK: memref.load %[[ARG0]][] +// CHECK: memref.load %[[ARG1]][] +// CHECK: scf.if +// CHECK: scf.yield +// CHECK: else +// CHECK: scf.yield +// CHECK: store %{{.*}}, %[[ARG2]][] // CHECKPARALLEL-LABEL: @scalar_code // CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref @@ -1199,24 +1198,24 @@ outs(%C : memref) return } -// CHECKLOOP-LABEL: @named_batch_matmul -// CHECKLOOP-SAME: %[[mA:[a-zA-Z0-9]+]]: memref -// CHECKLOOP-SAME: %[[mB:[a-zA-Z0-9]+]]: memref -// CHECKLOOP-SAME: %[[mC:[a-zA-Z0-9]+]]: memref -// CHECKLOOP: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref -// CHECKLOOP: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref -// CHECKLOOP: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref -// CHECKLOOP: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref -// CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} { -// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} { -// CHECKLOOP: scf.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} { -// CHECKLOOP: scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref -// CHECKLOOP: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref -// CHECKLOOP: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref -// CHECKLOOP: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 -// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref +// CHECK-LABEL: @named_batch_matmul +// CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref +// CHECK: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref +// CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref +// CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref +// CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref +// CHECK: scf.for %[[b:.*]] = %{{.*}} to %[[B]] +// CHECK: scf.for %[[m:.*]] = %{{.*}} to %[[M]] +// CHECK: scf.for %[[n:.*]] = %{{.*}} to %[[N]] +// CHECK: scf.for %[[k:.*]] = %{{.*}} to %[[K]] +// CHECK: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref +// CHECK: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref +// CHECK: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref +// CHECK: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 +// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECK: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref // CHECKPARALLEL-LABEL: @named_batch_matmul // CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref @@ -1227,7 +1226,7 @@ // CHECKPARALLEL: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref // CHECKPARALLEL: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref // CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) { -// CHECKPARALLEL: scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL: scf.for %[[k:.*]] = %{{.*}} to %[[K]] // CHECKPARALLEL: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref // CHECKPARALLEL: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref // CHECKPARALLEL: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref @@ -1242,23 +1241,23 @@ return } -// CHECKLOOP-LABEL: @conv1d_no_symbols -// CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref -// CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref -// CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKLOOP: %[[c0:.*]] = constant 0 : index -// CHECKLOOP: %[[c1:.*]] = constant 1 : index -// CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { -// CHECKLOOP: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { -// CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) -// CHECKLOOP: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref -// CHECKLOOP: %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref -// CHECKLOOP: %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref -// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[arg2]][%[[b]]] : memref +// CHECK-LABEL: @conv1d_no_symbols +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref +// CHECK: %[[c0:.*]] = constant 0 : index +// CHECK: %[[c1:.*]] = constant 1 : index +// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref +// CHECK: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref +// CHECK: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { +// CHECK: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { +// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) +// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref +// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref +// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref +// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECK: store %[[res]], %[[arg2]][%[[b]]] : memref // CHECKPARALLEL-LABEL: @conv1d_no_symbols // CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref @@ -1284,30 +1283,30 @@ outs(%out: memref) return } -// CHECKLOOP-LABEL: @conv2d_no_symbols -// CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref -// CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref -// CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKLOOP: %[[c0:.*]] = constant 0 : index -// CHECKLOOP: %[[c1:.*]] = constant 1 : index -// CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { -// CHECKLOOP: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] { -// CHECKLOOP: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { -// CHECKLOOP: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { -// CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) -// CHECKLOOP: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) -// CHECKLOOP: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref - -// CHECKLOOP: %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref -// CHECKLOOP: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref - -// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref +// CHECK-LABEL: @conv2d_no_symbols +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref +// CHECK: %[[c0:.*]] = constant 0 : index +// CHECK: %[[c1:.*]] = constant 1 : index +// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref +// CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref +// CHECK: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref +// CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref +// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { +// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] { +// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { +// CHECK: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { +// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) +// CHECK: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) +// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref + +// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref +// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref + +// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECK: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref // CHECKPARALLEL-LABEL: @conv2d_no_symbols // CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref @@ -1338,36 +1337,36 @@ return } -// CHECKLOOP-LABEL: @conv3d_no_symbols -// CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref -// CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref -// CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKLOOP: %[[c2:.*]] = constant 2 : index -// CHECKLOOP: %[[c0:.*]] = constant 0 : index -// CHECKLOOP: %[[c1:.*]] = constant 1 : index -// CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref -// CHECKLOOP: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref -// CHECKLOOP: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] { -// CHECKLOOP: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim4]] step %[[c1]] { -// CHECKLOOP: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim5]] step %[[c1]] { -// CHECKLOOP: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { -// CHECKLOOP: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { -// CHECKLOOP: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { -// CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) -// CHECKLOOP: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) -// CHECKLOOP: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) -// CHECKLOOP: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref - -// CHECKLOOP: %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref -// CHECKLOOP: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref - -// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref +// CHECK-LABEL: @conv3d_no_symbols +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref +// CHECK: %[[c2:.*]] = constant 2 : index +// CHECK: %[[c0:.*]] = constant 0 : index +// CHECK: %[[c1:.*]] = constant 1 : index +// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref +// CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref +// CHECK: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref +// CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref +// CHECK: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref +// CHECK: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref +// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] { +// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim4]] step %[[c1]] { +// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim5]] step %[[c1]] { +// CHECK: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { +// CHECK: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { +// CHECK: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { +// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) +// CHECK: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) +// CHECK: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) +// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref + +// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref +// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref + +// CHECK: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECK: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref // CHECKPARALLEL-LABEL: @conv3d_no_symbols // CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref