diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -25,7 +25,7 @@ class AffineApplyOp; class AffineBound; class AffineValueMap; -class IRRewriter; +class RewriterBase; /// TODO: These should be renamed if they are on the mlir namespace. /// Ideally, they should go in a mlir::affine:: namespace. @@ -381,13 +381,30 @@ AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e, ValueRange values); +/// Constructs an AffineApplyOp that applies `map` to `operands` after composing +/// the map with the maps of any other AffineApplyOp supplying the operands, +/// then immediately attempts to fold it. If folding results in a constant +/// value, erases all created ops. The `map` must be a single-result affine map. +OpFoldResult makeComposedFoldedAffineApply(RewriterBase &b, Location loc, + AffineMap map, + ArrayRef operands); +/// Variant of `makeComposedFoldedAffineApply` that applies to an expression. +OpFoldResult makeComposedFoldedAffineApply(RewriterBase &b, Location loc, + AffineExpr expr, + ArrayRef operands); + +/// Returns an AffineMinOp obtained by composing `map` and `operands` with +/// AffineApplyOps supplying those operands. +Value makeComposedAffineMin(OpBuilder &b, Location loc, AffineMap map, + ValueRange operands); + /// Returns the values obtained by applying `map` to the list of values. SmallVector applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values); /// Returns the values obtained by applying `map` to the list of values, which /// may be known constants. -SmallVector applyMapToValues(IRRewriter &b, Location loc, +SmallVector applyMapToValues(RewriterBase &b, Location loc, AffineMap map, ArrayRef values); diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -14,6 +14,7 @@ #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/IntegerSet.h" #include "mlir/IR/Matchers.h" +#include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/SmallBitVector.h" @@ -588,7 +589,7 @@ /// AffineSymbolExpr@[pos - dims.size()] is replaced. /// Mutate `map`,`dims` and `syms` in place as follows: /// 1. `dims` and `syms` are only appended to. -/// 2. `map` dim and symbols are gradually shifted to higer positions. +/// 2. `map` dim and symbols are gradually shifted to higher positions. /// 3. Old `dim` and `sym` entries are replaced by nullptr /// This avoids the need for any bookkeeping. static LogicalResult replaceDimOrSym(AffineMap *map, @@ -705,6 +706,68 @@ } } +/// Given a list of `OpFoldResult`, build the necessary operations to populate +/// `actualValues` with values produced by operations. In particular, for any +/// attribute-typed element in `values`, call the constant materializer +/// associated with the Affine dialect to produce an operation. +static void materializeConstants(OpBuilder &b, Location loc, + ArrayRef values, + SmallVectorImpl &constants, + SmallVectorImpl &actualValues) { + actualValues.reserve(values.size()); + auto *dialect = b.getContext()->getLoadedDialect(); + for (OpFoldResult ofr : values) { + if (auto value = ofr.dyn_cast()) { + actualValues.push_back(value); + continue; + } + constants.push_back(dialect->materializeConstant(b, ofr.get(), + b.getIndexType(), loc)); + actualValues.push_back(constants.back()->getResult(0)); + } +} + +/// Create an operation of the type provided as template argument and attempt to +/// fold it immediately. The operation is expected to have a builder taking +/// arbitrary `leadingArguments`, followed by a list of Value-typed `operands`. +/// The operation is also expected to always produce a single result. Return an +/// `OpFoldResult` containing the Attribute representing the folded constant if +/// complete folding was possible and a Value produced by the created operation +/// otherwise. +template +static std::enable_if_t(), + OpFoldResult> +createOrFold(RewriterBase &b, Location loc, ValueRange operands, + Args &&...leadingArguments) { + // Identify the constant operands and extract their values as attributes. + // Note that we cannot use the original values directly because the list of + // operands may have changed due to canonicalization and composition. + SmallVector constantOperands; + constantOperands.reserve(operands.size()); + for (Value operand : operands) { + IntegerAttr attr; + if (matchPattern(operand, m_Constant(&attr))) + constantOperands.push_back(attr); + else + constantOperands.push_back(nullptr); + } + + // Create the operation and immediately attempt to fold it. On success, + // delete the operation and prepare the (unmaterialized) value for being + // returned. On failure, return the operation result value. + // TODO: arguably, the main folder (createOrFold) API should support this use + // case instead of indiscriminately materializing constants. + OpTy op = + b.create(loc, std::forward(leadingArguments)..., operands); + SmallVector foldResults; + if (succeeded(op->fold(constantOperands, foldResults)) && + !foldResults.empty()) { + b.eraseOp(op); + return foldResults.front(); + } + return op->getResult(0); +} + AffineApplyOp mlir::makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ValueRange operands) { @@ -722,6 +785,96 @@ values); } +OpFoldResult +mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc, + AffineMap map, + ArrayRef operands) { + assert(map.getNumResults() == 1 && "building affine.apply with !=1 result"); + + SmallVector constants; + SmallVector actualValues; + materializeConstants(b, loc, operands, constants, actualValues); + composeAffineMapAndOperands(&map, &actualValues); + return createOrFold(b, loc, actualValues, map); +} + +OpFoldResult +mlir::makeComposedFoldedAffineApply(RewriterBase &b, Location loc, + AffineExpr expr, + ArrayRef operands) { + return makeComposedFoldedAffineApply( + b, loc, AffineMap::inferFromExprList(ArrayRef{expr}).front(), + operands); +} + +Value mlir::makeComposedAffineMin(OpBuilder &b, Location loc, AffineMap map, + ValueRange operands) { + // 1. Compose maps and operands for individual expressions. Store results as + // single-expression maps in order to keep track of the numbers of dimensions + // and symbols used in each. + SmallVector, 2> exprOperands; + SmallVector exprs; + exprOperands.reserve(map.getNumResults()); + exprs.reserve(map.getNumResults()); + for (unsigned i : llvm::seq(0, map.getNumResults())) { + AffineMap &exprMap = exprs.emplace_back(map.getSubMap({i})); + fullyComposeAffineMapAndOperands( + &exprMap, &exprOperands.emplace_back(llvm::to_vector(operands))); + } + + // 2. Collect unique dimension and symbol operands used across individual + // expressions. We don't expect these sets to overlap because composition + // implies canonicalization that would have promoted the same operand to be a + // symbol based on its affine category (that only depends on where the operand + // value is defined) identically across all expressions. + SetVector normalizedDimOperandSet; + SetVector normalizedSymbolOperandSet; + for (unsigned i : llvm::seq(0, map.getNumResults())) { + auto *it = exprOperands[i].begin() + exprs[i].getNumDims(); + normalizedDimOperandSet.insert(exprOperands[i].begin(), it); + normalizedSymbolOperandSet.insert(it, exprOperands[i].end()); + } + + // 3. Create a single list of unique operands containing dimensions followed + // by symbols. + unsigned numDims = normalizedDimOperandSet.size(); + unsigned numSymbols = normalizedSymbolOperandSet.size(); + SmallVector normalizedOperands = + llvm::to_vector(normalizedDimOperandSet); + llvm::append_range(normalizedOperands, normalizedSymbolOperandSet); + + // 4. For each composed expression, remap its dimension and symbols to other + // dimensions and symbols that correspond to the positions of the operands + // specific to this expression in the normalized operand list. + SmallVector normalizedExprs; + normalizedExprs.reserve(exprs.size()); + for (unsigned i : llvm::seq(0, exprs.size())) { + SmallVector dimReplacements; + SmallVector symReplacements; + for (const auto &en : llvm::enumerate(exprOperands[i])) { + Value operand = en.value(); + unsigned position = std::distance( + normalizedOperands.begin(), llvm::find(normalizedOperands, operand)); + AffineExpr replacement = position < numDims + ? b.getAffineDimExpr(position) + : b.getAffineSymbolExpr(position - numDims); + if (en.index() < exprs[i].getNumDims()) + dimReplacements.push_back(replacement); + else + symReplacements.push_back(replacement); + } + normalizedExprs.push_back(exprs[i].getResult(0).replaceDimsAndSymbols( + dimReplacements, symReplacements)); + } + + // 5. Construct an affine.min with normalized expressions and operands. Note + // that it may even fold to a constant thanks to normalization. + auto normalizedMap = + AffineMap::get(numDims, numSymbols, normalizedExprs, b.getContext()); + return b.createOrFold(loc, b.getIndexType(), normalizedMap, + normalizedOperands); +} + /// Fully compose map with operands and canonicalize the result. /// Return the `createOrFold`'ed AffineApply op. static Value createFoldedComposedAffineApply(OpBuilder &b, Location loc, @@ -749,23 +902,13 @@ } SmallVector -mlir::applyMapToValues(IRRewriter &b, Location loc, AffineMap map, +mlir::applyMapToValues(RewriterBase &b, Location loc, AffineMap map, ArrayRef values) { // Materialize constants and keep track of produced operations so we can clean // them up later. SmallVector constants; SmallVector actualValues; - actualValues.reserve(values.size()); - auto *dialect = b.getContext()->getLoadedDialect(); - for (OpFoldResult ofr : values) { - if (auto value = ofr.dyn_cast()) { - actualValues.push_back(value); - continue; - } - constants.push_back(dialect->materializeConstant(b, ofr.get(), - b.getIndexType(), loc)); - actualValues.push_back(constants.back()->getResult(0)); - } + materializeConstants(b, loc, values, constants, actualValues); // Compose, fold and construct maps for each result independently because they // may simplify more effectively. @@ -777,35 +920,9 @@ SmallVector operands = actualValues; fullyComposeAffineMapAndOperands(&submap, &operands); canonicalizeMapAndOperands(&submap, &operands); - - // Identify the constant operands and extract their values as attributes. - // Note that we cannot use the original values directly because the list of - // operands may have changed due to canonicalization and composition. - SmallVector constantOperands; - constantOperands.reserve(operands.size()); - for (Value operand : operands) { - IntegerAttr attr; - if (matchPattern(operand, m_Constant(&attr))) - constantOperands.push_back(attr); - else - constantOperands.push_back(nullptr); - } - - // Create an apply operation and immediately attempt to fold it. On sucess, - // delete the operation and prepare the (unmaterialized) value for being - // returned. On failure, return the function result. - // TODO: arguably, the main folder (createOrFold) API should support this - // use case instead of indiscriminately materializing constants. - auto apply = b.create(loc, submap, operands); - SmallVector foldResult; - if (succeeded(apply->fold(constantOperands, foldResult))) { - assert(foldResult.size() == 1 && "expected single-result map"); - b.eraseOp(apply); - results.push_back(foldResult.front()); - } else { - results.push_back(apply.getResult()); + results.push_back(createOrFold(b, loc, operands, submap)); + if (!results.back().is()) foldedAll = false; - } } // If the entire map could be folded, remove the constants that were used in diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -294,11 +294,11 @@ return emitSilenceableError() << "could not generate tile size computation"; } + AffineExpr s0 = builder.getAffineSymbolExpr(0); + AffineExpr s1 = builder.getAffineSymbolExpr(1); Operation *splitPoint = - builder - .createOrFold(target.getLoc(), spec->lowTileSize, - spec->lowTripCount) - .getDefiningOp(); + makeComposedAffineApply(builder, target.getLoc(), s0 * s1, + {spec->lowTileSize, spec->lowTripCount}); Operation *lowTileSize = spec->lowTileSize.getDefiningOp(); Operation *highTileSize = spec->highTileSize.getDefiningOp(); assert(lowTileSize && highTileSize && splitPoint && diff --git a/mlir/lib/Dialect/Linalg/Transforms/Split.cpp b/mlir/lib/Dialect/Linalg/Transforms/Split.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Split.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Split.cpp @@ -9,6 +9,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" #include "llvm/ADT/STLExtras.h" @@ -24,7 +25,7 @@ /// generated; otherwise, the "low" part with no offset is generated. Note that /// `operands` are not necessarily the actual operands of `op`. static SmallVector -getOperandSlices(ImplicitLocOpBuilder &builder, LinalgOp op, +getOperandSlices(RewriterBase &b, Location loc, LinalgOp op, ValueRange splitIterationSpace, ValueRange operands, unsigned dimension, Value offset = nullptr) { SmallVector slices; @@ -42,20 +43,24 @@ continue; } - SmallVector sizes = - applyMapToValues(builder, op.getLoc(), indexing, splitIterationSpace); - SmallVector offsets(type.getRank(), builder.getIndexAttr(0)); - SmallVector strides(type.getRank(), builder.getIndexAttr(1)); + SmallVector sizes; + sizes.reserve(indexing.getNumResults()); + for (AffineExpr dimIndexing : indexing.getResults()) { + sizes.push_back(makeComposedFoldedAffineApply( + b, loc, dimIndexing, + getAsOpFoldResult(llvm::to_vector(splitIterationSpace)))); + } + SmallVector offsets(type.getRank(), b.getIndexAttr(0)); + SmallVector strides(type.getRank(), b.getIndexAttr(1)); if (offset) { offsets[dimension] = offset; - IRRewriter rewriter(builder); - offsets = applyMapToValues(rewriter, builder.getLoc(), indexing, offsets); + offsets = applyMapToValues(b, loc, indexing, offsets); } - slices.push_back(createSlice(builder, op.getLoc(), + slices.push_back(createSlice(b, loc, operands[opOperand->getOperandNumber()], - offsets, getAsOpFoldResult(sizes), strides)); + offsets, sizes, strides)); } return slices; @@ -70,7 +75,7 @@ /// Returns the split-out op as well as the output operand values updated with /// the partial results produced by this op through `results`. static LinalgOp createSplitPart( - ImplicitLocOpBuilder &builder, LinalgOp op, ValueRange resultOperands, + RewriterBase &b, Location loc, LinalgOp op, ValueRange resultOperands, llvm::MutableArrayRef splitIterationSpace, unsigned dimension, Value size, SmallVectorImpl &results, Value offset = nullptr) { splitIterationSpace[dimension] = size; @@ -78,12 +83,11 @@ llvm::map_range(op.getInputOperands(), [](OpOperand *opOperand) { return opOperand->get(); })); llvm::append_range(operands, resultOperands); - operands = getOperandSlices(builder, op, splitIterationSpace, operands, + operands = getOperandSlices(b, loc, op, splitIterationSpace, operands, dimension, offset); - Operation *part = op.clone(builder, op.getLoc(), - getTensorOutputTypes(op, operands), operands); - results = insertSlicesBack(builder, builder.getLoc(), op, operands, - part->getResults()); + Operation *part = + op.clone(b, loc, getTensorOutputTypes(op, operands), operands); + results = insertSlicesBack(b, loc, op, operands, part->getResults()); return cast(part); } @@ -95,21 +99,21 @@ return std::make_pair(op, LinalgOp()); // Compute the iteration space size as values. - ImplicitLocOpBuilder builder(op.getLoc(), rewriter); SmallVector allShapes = - op.createFlatListOfOperandDims(builder, op.getLoc()); + op.createFlatListOfOperandDims(rewriter, op.getLoc()); AffineMap shapesToLoops = op.getShapesToLoopsMap(); SmallVector iterationSpaceShapes = - applyMapToValues(builder, op.getLoc(), shapesToLoops, allShapes); + applyMapToValues(rewriter, op.getLoc(), shapesToLoops, allShapes); // Update the iteration space to have `splitPoint` as the size of `dimension` // and use it to slice operands and results for a new, smaller instance of the // `op`. Adjust the size if necessary to prevent overflows. Insert the partial // results back. - Value splitPointValue = materializeOpFoldResult(builder, splitPoint); - splitPointValue = builder.createOrFold( - builder.getIndexType(), - AffineMap::getMultiDimIdentityMap(/*numDims=*/2, builder.getContext()), + ImplicitLocOpBuilder implicit(op.getLoc(), rewriter); + Value splitPointValue = materializeOpFoldResult(implicit, splitPoint); + splitPointValue = makeComposedAffineMin( + rewriter, op.getLoc(), + AffineMap::getMultiDimIdentityMap(/*numDims=*/2, rewriter.getContext()), ValueRange({splitPointValue, iterationSpaceShapes[dimension]})); SmallVector splitIterationSpace = llvm::to_vector(iterationSpaceShapes); @@ -117,23 +121,23 @@ llvm::map_range(op.getOutputOperands(), [](OpOperand *opOperand) { return opOperand->get(); })); SmallVector firstResults; - LinalgOp first = - createSplitPart(builder, op, originalResults, splitIterationSpace, - dimension, splitPointValue, firstResults); + LinalgOp first = createSplitPart(rewriter, op.getLoc(), op, originalResults, + splitIterationSpace, dimension, + splitPointValue, firstResults); // Update the iteration space to cover the remaining part of the original // space, then create another instance of the `op` in that space. The size of // the remaining part may become zero, but is never negative because of the // adjustment above. - AffineExpr d0 = builder.getAffineDimExpr(0); - AffineExpr d1 = builder.getAffineDimExpr(1); + AffineExpr d0 = rewriter.getAffineDimExpr(0); + AffineExpr d1 = rewriter.getAffineDimExpr(1); SmallVector remainingSizes = applyMapToValues( - builder, op.getLoc(), AffineMap::inferFromExprList({d0 - d1}).front(), - {iterationSpaceShapes[dimension], splitPointValue}); + rewriter, op.getLoc(), AffineMap::inferFromExprList({d0 - d1}).front(), + ValueRange{iterationSpaceShapes[dimension], splitPointValue}); SmallVector secondResults; - LinalgOp second = - createSplitPart(builder, op, firstResults, splitIterationSpace, dimension, - remainingSizes.front(), secondResults, splitPointValue); + LinalgOp second = createSplitPart( + rewriter, op.getLoc(), op, firstResults, splitIterationSpace, dimension, + remainingSizes.front(), secondResults, splitPointValue); // Fixup the linalg.index results in the second part. SmallVector ivAdditions; diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir --- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir +++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir @@ -28,9 +28,6 @@ func.func private @elem(%arg0: f32, %arg1: index, %arg2: index) -> f32 -// CHECK-DAG: #[[$MAP_MIN_4_2:.+]] = affine_map<(d0) -> (-d0 + 4, 2)> -// CHECK-DAG: #[[$MAP_MIN_16_8:.+]] = affine_map<(d0) -> (-d0 + 16, 8)> - // CHECK-LABEL: @two_d // CHECK-SAME: %[[IN:.+]]: tensor<10x34xf32>, %[[OUT:.+]]: tensor<10x34xf32> func.func @two_d(%arg0: tensor<10x34xf32>, @@ -54,35 +51,27 @@ // respectively, and in this order. // Check the full code for the first quadrant, the data flow for the second // quadrant and only the overall code structure for the remaining quadrants. - // - // TODO: unfortunately, the canonicalization is insufficiently powerful to - // remove the affine min for sizes, leading to dynamic sizes even when tiling - // statically-shaped operation with constant tile sizes. + // The canonicalizer is able to recover static shapes of for linalg.generic + // instances, use those to differentiate the quadrants. // CHECK: %[[SLICE_1:.+]] = tensor.extract_slice %[[OUT]][0, 0] [4, 34] [1, 1] // CHECK: scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_1:.+]] = %[[SLICE_1]]) - // CHECK: %[[SZ1:.+]] = affine.min #[[$MAP_MIN_4_2]](%[[I1]]) - // CHECK: %[[INSLICE_1:.+]] = tensor.extract_slice %[[IN]][%[[I1]], 0] [%[[SZ1]], 34] [1, 1] - // CHECK: %[[SZ2:.+]] = affine.min #[[$MAP_MIN_4_2]](%[[I1]]) - // CHECK: %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [%[[SZ2]], 34] [1, 1] + // CHECK: %[[INSLICE_1:.+]] = tensor.extract_slice %[[IN]][%[[I1]], 0] [2, 34] [1, 1] + // CHECK: %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 34] [1, 1] - // CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[OUTSLICE_1]][0, 0] [%[[SZ1]], 16] [1, 1] + // CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1] // CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]]) - // CHECK: %[[SZ3:.+]] = affine.min #[[$MAP_MIN_16_8]](%[[I2]]) - // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[INSLICE_1]][0, %[[I2]]] [%[[SZ1]], %[[SZ3]]] [1, 1] - // CHECK: %[[SZ4:.+]] = tensor.dim %[[ITERARG_2]] - // CHECK: %[[SZ5:.+]] = affine.min #[[$MAP_MIN_16_8]](%[[I2]]) - // CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [%[[SZ4]], %[[SZ5]]] [1, 1] - - // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor) outs(%[[OUTSLICE_2]] : tensor) + // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[INSLICE_1]][0, %[[I2]]] [2, 8] [1, 1] + // CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1] + // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>) // CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]] // CHECK: scf.yield %[[RESPARTIAL]] - // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[LOOPRES]] into %[[OUTSLICE_1]][0, 0] [%[[SZ1]], 16] [1, 1] - // CHECK: %[[OUTSLICE_3:.+]] = tensor.extract_slice %[[INSERTED]][0, 16] [%[[SZ1]], 18] [1, 1] + // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[LOOPRES]] into %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1] + // CHECK: %[[OUTSLICE_3:.+]] = tensor.extract_slice %[[INSERTED]][0, 16] [2, 18] [1, 1] // CHECK: scf.for %{{.*}} iter_args(%{{.*}} = %[[OUTSLICE_3]]) // CHECK-COUNT-2: tensor.extract_slice - // CHECK: linalg.generic + // CHECK: linalg.generic {{.*}} ins(%{{.*}} : tensor<2x9xf32>) // CHECK: tensor.insert_slice // CHECK: scf.yield // CHECK: %[[INSERTED_2:.+]] = tensor.insert_slice %{{.*}} into %[[INSERTED]] @@ -95,14 +84,14 @@ // CHECK-COUNT-3: tensor.extract_slice // CHECK: scf.for // CHECK-COUNT-2: tensor.extract_slice - // CHECK: linalg.generic + // CHECK: linalg.generic {{.*}} ins(%{{.*}} : tensor<3x8xf32>) // CHECK: tensor.insert_slice // CHECK: scf.yield // CHECK: tensor.insert_slice // CHECK: tensor.extract_slice // CHECK: scf.for // CHECK-COUNT-2: tensor.extract_slice - // CHECK: linalg.generic + // CHECK: linalg.generic {{.*}} ins(%{{.*}} : tensor<3x9xf32>) // CHECK: tensor.insert_slice // CHECK: scf.yield // CHECK-COUNT-2: tensor.insert_slice diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s --test-transform-dialect-interpreter --split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt %s --test-transform-dialect-interpreter --canonicalize --split-input-file -verify-diagnostics | FileCheck %s --check-prefix=CANON transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): @@ -59,6 +60,8 @@ // CHECK-LABEL: @one_d_static_overflow // CHECK-SAME: %[[IN:.+]]: tensor<10xf32>, %[[OUT:.+]]: tensor<10xf32> +// CANON-LABEL: @one_d_static_overflow +// CANON-SAME: %[[IN:.+]]: tensor<10xf32>, %[[OUT:.+]]: tensor<10xf32> func.func @one_d_static_overflow(%arg0: tensor<10xf32>, %arg1: tensor<10xf32>) -> tensor<10xf32> { // CHECK: %[[IN_SLICE_LOW:.+]] = tensor.extract_slice %[[IN]][0] [10] [1] : tensor<10xf32> to tensor<10xf32> // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT]][0] [10] [1] : tensor<10xf32> to tensor<10xf32> @@ -69,6 +72,16 @@ // CHECK: func.call @elem // CHECK: %[[RES_PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [10] [1] // + // Due to overflow, the first part of the split computes everything and the + // insert/extract slices are folded away by the canonicalizer. + // CANON: %[[RES_PARTIAL:.+]] = linalg.generic + // CANON: ins(%[[IN]] + // CANON: outs(%[[OUT]] + // CANON: linalg.index 0 + // CANON: func.call @elem + // The second part operates on zero-sized slices that are not currently + // folded away. + // // CHECK: %[[IN_SLICE_HIGH:.+]] = tensor.extract_slice %[[IN]][10] [0] [1] : tensor<10xf32> to tensor<0xf32> // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[RES_PARTIAL]][10] [0] [1] : tensor<10xf32> to tensor<0xf32> // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic @@ -148,7 +161,8 @@ } ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { ^bb0(%3: f32, %4: f32): - linalg.yield %3 : f32 + %5 = arith.addf %3, %4 : f32 + linalg.yield %5 : f32 } -> tensor<100xf32> return %1 : tensor<100xf32> }