diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -189,17 +189,15 @@ FailureOr tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options); +/// Try to peel anad canonicalize loop `op` and return the new result. +// TODO: Add support for scf.parallel and affine.for loops. +SmallVector peelLoop(RewriterBase &rewriter, Operation *op); /// Peel and canonicalize 'loops'. void peelLoops(RewriterBase &rewriter, ArrayRef loops); -/// Peel the loops of a TiledLinalgOp. -void peelTiledLinalgOp(RewriterBase &rewriter, TiledLinalgOp &res, - ArrayRef peeledLoops, - LinalgTilingLoopType loopType); - /// Interchange the `iterator_types` and `iterator_maps` dimensions and adapts -/// the index accesses of `op`. This is an in-place transformation controlled by -/// `interchangeVector`. An empty vector is interpreted as the identity +/// the index accesses of `op`. This is an in-place transformation controlled +/// by `interchangeVector`. An empty vector is interpreted as the identity /// permutation and the transformation returns early. /// /// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed with @@ -232,10 +230,10 @@ using DeallocBufferCallbackFn = std::function; -/// Callback function type used to insert copy from original subview to subview -/// of the promoted region for the read operands/subview of promoted region to -/// original subview for the results. The copy has to happen from `src` to -/// `dst`. +/// Callback function type used to insert copy from original subview to +/// subview of the promoted region for the read operands/subview of promoted +/// region to original subview for the results. The copy has to happen from +/// `src` to `dst`. using CopyCallbackFn = std::function; @@ -247,12 +245,11 @@ operandsToPromote->insert(operands.begin(), operands.end()); return *this; } - /// If ith element of `useFullTiles` is true the full view should be used for - /// the promoted buffer of the ith operand in `operandsToPromote`. Otherwise - /// the partial view will be used. - /// The decision is defaulted to `useFullTileBuffersDefault` when - /// `useFullTileBuffers` is None and for operands missing from - /// `useFullTileBuffers`. + /// If ith element of `useFullTiles` is true the full view should be used + /// for the promoted buffer of the ith operand in `operandsToPromote`. + /// Otherwise the partial view will be used. The decision is defaulted to + /// `useFullTileBuffersDefault` when `useFullTileBuffers` is None and for + /// operands missing from `useFullTileBuffers`. Optional useFullTileBuffers = None; LinalgPromotionOptions &setUseFullTileBuffers(ArrayRef useFullTiles) { unsigned size = useFullTiles.size(); @@ -262,8 +259,8 @@ useFullTileBuffers = tmp; return *this; } - /// If true all operands unspecified by `useFullTileBuffers` will use the full - /// view, otherwise the partial view. + /// If true all operands unspecified by `useFullTileBuffers` will use the + /// full view, otherwise the partial view. bool useFullTileBuffersDefault = false; LinalgPromotionOptions &setUseFullTileBuffersByDefault(bool use) { useFullTileBuffersDefault = use; @@ -306,10 +303,10 @@ }; /// Create a new buffer using the `allocationFn` provided. The size of this -/// buffer is the smallest constant bounding size along each dimension that can -/// be computed for the size of the result of `subView`. Returns the allocated -/// buffer as `fullLocalView` and the view that matches the size of the result -/// of subview operation as `partialLocalView`. +/// buffer is the smallest constant bounding size along each dimension that +/// can be computed for the size of the result of `subView`. Returns the +/// allocated buffer as `fullLocalView` and the view that matches the size of +/// the result of subview operation as `partialLocalView`. struct PromotionInfo { Value fullLocalView; Value partialLocalView; @@ -321,7 +318,8 @@ /// Promote the `subViews` into a new buffer allocated at the insertion point /// `b`. Promotion occurs in 3 steps: -/// 1. Create a new buffer for a full tile (i.e. not clipped at the boundary). +/// 1. Create a new buffer for a full tile (i.e. not clipped at the +/// boundary). /// 2. Take a full view on the buffer. /// 3. Take a partial slice of the full view in step 2. and copy into it. /// @@ -369,12 +367,12 @@ /// Creates a number of ranges equal to the number of non-zero in `tileSizes`. /// One for each loop of the LinalgOp that is tiled. The `tileSizes` argument /// has one entry per surrounding loop. It uses zero as the convention that a -/// particular loop is not tiled. This convention simplifies implementations by -/// avoiding affine map manipulations. -/// The returned ranges correspond to the loop ranges, in the proper order, that -/// are tiled and for which new loops will be created. Also the function returns -/// a map from loop indices of the LinalgOp to the corresponding non-empty range -/// indices of newly created loops. +/// particular loop is not tiled. This convention simplifies implementations +/// by avoiding affine map manipulations. The returned ranges correspond to +/// the loop ranges, in the proper order, that are tiled and for which new +/// loops will be created. Also the function returns a map from loop indices +/// of the LinalgOp to the corresponding non-empty range indices of newly +/// created loops. using LoopIndexToRangeIndexMap = DenseMap; std::tuple, LoopIndexToRangeIndexMap> makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, @@ -392,9 +390,9 @@ }; /// Emits the IR computing the multi-sized tiling specification with two tile -/// sizes not exceeding `targetSize`, each divisible by `sizeDivisor`, such that -/// there exist numbers of tiles with these sizes that fully cover the given -/// iteration space `dimension` of the structured `op`. +/// sizes not exceeding `targetSize`, each divisible by `sizeDivisor`, such +/// that there exist numbers of tiles with these sizes that fully cover the +/// given iteration space `dimension` of the structured `op`. /// /// The computation is as follows: /// @@ -427,11 +425,10 @@ /// tiling by `numThreads`. /// If non-empty, the `threadDimMapping` is added as an attribute to the /// resulting `scf.foreach_thread`. -/// Zero tile sizes indicate that the dimension is not tiled, and can be thought -/// of as tiling by the full size of data. -/// It is the user's responsibility to ensure that `numThreads` is a -/// valid tiling specification (i.e. that only tiles parallel -/// dimensions, e.g. in the Linalg case). +/// Zero tile sizes indicate that the dimension is not tiled, and can be +/// thought of as tiling by the full size of data. It is the user's +/// responsibility to ensure that `numThreads` is a valid tiling specification +/// (i.e. that only tiles parallel dimensions, e.g. in the Linalg case). struct ForeachThreadTilingResult { Operation *tileOp; Operation *tiledOp; @@ -448,10 +445,10 @@ ArrayRef tileSizes, ArrayRef threadDimMapping = {}); -/// All indices returned by IndexOp should be invariant with respect to tiling. -/// Therefore, if an operation is tiled, we have to transform the indices -/// accordingly, i.e. offset them by the values of the corresponding induction -/// variables that are captured implicitly in the body of the op. +/// All indices returned by IndexOp should be invariant with respect to +/// tiling. Therefore, if an operation is tiled, we have to transform the +/// indices accordingly, i.e. offset them by the values of the corresponding +/// induction variables that are captured implicitly in the body of the op. /// /// Example. `linalg.generic` before tiling: /// @@ -491,8 +488,9 @@ /// %transformed_i = arith.addi %i, %k : index // index `i` is offset by /// %k %transformed_j = arith.addi %j, %l : index // index `j` is offset /// by %l -/// // Every use of %i, %j is replaced with %transformed_i, %transformed_j -/// +/// // Every use of %i, %j is replaced with %transformed_i, +/// %transformed_j /// }: memref, memref /// } /// } @@ -516,8 +514,8 @@ paddingDimensions.assign(pd.begin(), pd.end()); return *this; } - /// A flag for every operand to mark the PadOp as nofold which enables packing - /// for statically shaped operands. + /// A flag for every operand to mark the PadOp as nofold which enables + /// packing for statically shaped operands. SmallVector packPaddings; LinalgPaddingOptions &setPackPaddings(ArrayRef pp) { packPaddings.assign(pp.begin(), pp.end()); @@ -529,8 +527,8 @@ hoistPaddings.assign(hp.begin(), hp.end()); return *this; } - /// A permutation vector for every operand used to transpose the packed PadOp - /// results. + /// A permutation vector for every operand used to transpose the packed + /// PadOp results. SmallVector> transposePaddings; LinalgPaddingOptions & setTransposePaddings(ArrayRef> tp) { @@ -629,20 +627,12 @@ } }; -/// Canonicalization patterns relevant to apply after tiling patterns. These are -/// applied automatically by the tiling pass but need to be applied manually -/// when tiling is called programmatically. +/// Canonicalization patterns relevant to apply after tiling patterns. These +/// are applied automatically by the tiling pass but need to be applied +/// manually when tiling is called programmatically. RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx); void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns); -/// Perform tiling using LinalgTilingOptions. -/// Note: this is on a path to deprecation that only works on LinalgOp. -/// Clients should favor using `tileUsingSCFForOp` that more generally works on -/// TilingInterface. -FailureOr -tileWithLinalgTilingOptions(RewriterBase &rewriter, LinalgOp op, - const LinalgTilingOptions &options); - /// /// Linalg padding pattern. /// @@ -713,14 +703,14 @@ /// Apply the `generalization` transformation as a pattern. /// See `generalization` for more details. // -// TODO: Automatic default pattern class that just unwraps a function returning -// FailureOr. +// TODO: Automatic default pattern class that just unwraps a function +// returning FailureOr. struct LinalgGeneralizationPattern : public OpInterfaceRewritePattern { using OpInterfaceRewritePattern::OpInterfaceRewritePattern; - /// `matchAndRewrite` implementation that returns the significant transformed - /// pieces of IR. + /// `matchAndRewrite` implementation that returns the significant + /// transformed pieces of IR. FailureOr returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const { return generalizeNamedOp(rewriter, op); @@ -765,8 +755,8 @@ // Op-specific patterns. //===----------------------------------------------------------------------===// -/// tensor::PadOp is not canonicalized away yet, so we provide a transformation -/// to `linalg.generic`. +/// tensor::PadOp is not canonicalized away yet, so we provide a +/// transformation to `linalg.generic`. struct PadOpTransformationPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -774,12 +764,12 @@ PatternRewriter &rewriter) const override; }; -/// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands to -/// a static bounding box. Use `paddingValues` and `packPaddings` to set padding -/// value and nofold attribute of the created tensor::PadOps, respectively. -/// Update `paddedOp` to the cloned operation with statically shaped -/// `paddingDimensions` and return the extracted dynamically shaped results. -/// If padding fails, return failure. +/// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands +/// to a static bounding box. Use `paddingValues` and `packPaddings` to set +/// padding value and nofold attribute of the created tensor::PadOps, +/// respectively. Update `paddedOp` to the cloned operation with statically +/// shaped `paddingDimensions` and return the extracted dynamically shaped +/// results. If padding fails, return failure. FailureOr> rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, ArrayRef paddingDimensions, @@ -866,7 +856,8 @@ /// vector.transfer_write %..., %out[...] /// ``` /// Where there is no interleaved use between transfer_write and memref.copy. -/// This is a custom rewrite to forward partial writes to vector.transfer_write. +/// This is a custom rewrite to forward partial writes to +/// vector.transfer_write. struct LinalgCopyVTWForwardingPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -903,10 +894,11 @@ /// Split Reduction options. struct SplitReductionOptions { - // Ratio used to split the reduction dimension. If the ratio is <= 1, nothing - // will be done. + // Ratio used to split the reduction dimension. If the ratio is <= 1, + // nothing will be done. int64_t ratio = 0; - // Index where the extra dimension is added to the intermediate tensor shape. + // Index where the extra dimension is added to the intermediate tensor + // shape. unsigned index = 0; // If the inner dimension after splitting is parallel or reduction. bool innerParallel = false; @@ -924,11 +916,10 @@ const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc = false); -/// Apply transformation to split the single linalg op reduction into a parallel -/// and reduction dimension. Then create a new linalg.generic op doing the rest -/// of the reduction. -/// Return the new linalg op with an extra parallel dimension or failure if the -/// transformation didn't happen. +/// Apply transformation to split the single linalg op reduction into a +/// parallel and reduction dimension. Then create a new linalg.generic op +/// doing the rest of the reduction. Return the new linalg op with an extra +/// parallel dimension or failure if the transformation didn't happen. /// /// Example: /// ``` @@ -945,10 +936,10 @@ /// To: /// ``` /// %cst = arith.constant 0.000000e+00 : f32 -/// %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32> -/// %1 = tensor.empty [4] : tensor<4xf32> -/// %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> -/// %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, +/// %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into +/// tensor<4x8xf32> %1 = tensor.empty [4] : tensor<4xf32> %2 = linalg.fill +/// ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> %3 = +/// linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, /// affine_map<(d0, d1) -> (d0)>], /// iterator_types = ["parallel", "reduction"]} /// ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) { @@ -977,8 +968,8 @@ bool useAlloc = false); /// Scaling-based implementation of the split reduction transformation. -/// Instead of introducing an ExpandShapeOp, this rewrites a reduction dimension -/// `k` into `k * scale + kk`. +/// Instead of introducing an ExpandShapeOp, this rewrites a reduction +/// dimension `k` into `k * scale + kk`. /// /// Example: /// ``` @@ -1003,7 +994,8 @@ /// /// %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3], /// iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -/// ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>) +/// ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, +/// tensor<64x4xi1>) /// outs(%1 : tensor<16x32x64xf32>) { /// ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32): /// %5 = arith.mulf %arg3, %arg4 : f32 diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -885,19 +885,37 @@ transform::ScalarizeOp::applyToOne(linalg::LinalgOp target, SmallVectorImpl &results, transform::TransformState &state) { - LinalgTilingOptions tilingOptions; - tilingOptions.scalarizeDynamicDims(); - // Tiling with "scalarize_dyn_dims" actually sets the same lambda as the - // tile sizes and asserts that it is not already set. + scf::SCFTilingOptions tilingOptions; + tilingOptions.setTileSizeComputationFunction([&](OpBuilder &b, Operation *) { + SmallVector tileSizes; + Location loc = target.getLoc(); + SmallVector allShapeSizes = + target.createFlatListOfOperandDims(b, loc); + AffineMap map = target.getShapesToLoopsMap(); + if (!map) + return tileSizes; + IRRewriter rewriter(b); + SmallVector shapeSizes = + makeComposedFoldedMultiResultAffineApply(rewriter, loc, map, + allShapeSizes); + // If the shape size is dynamic, tile by 1. + // Otherwise, do not tile (i.e. tile size 0). + for (OpFoldResult shapeSize : shapeSizes) { + tileSizes.push_back(getConstantIntValue(shapeSize) + ? b.create(loc, 0) + : b.create(loc, 1)); + } + return tileSizes; + }); SmallVector emptyTileSizes; SimpleRewriter rewriter(getContext()); rewriter.setInsertionPoint(target); - FailureOr result = - tileWithLinalgTilingOptions(rewriter, target, tilingOptions); - if (failed(result)) + FailureOr maybeTilingResult = tileUsingSCFForOp( + rewriter, cast(target.getOperation()), tilingOptions); + if (failed(maybeTilingResult)) return DiagnosedSilenceableFailure(reportUnknownTransformError(target)); - results.push_back(result->op); + results.push_back(maybeTilingResult->tiledOp); return DiagnosedSilenceableFailure(success()); } @@ -1127,7 +1145,7 @@ return diag; } - LinalgTilingOptions tilingOptions; + scf::SCFTilingOptions tilingOptions; unsigned index = en.index(); if (!tileSizes.empty()) { tilingOptions.setTileSizeComputationFunction( @@ -1148,15 +1166,22 @@ }); } - tilingOptions.setInterchange(extractUIntArray(getInterchange())); + tilingOptions.setInterchange(extractI64Array(getInterchange())); SimpleRewriter rewriter(linalgOp.getContext()); - FailureOr tiledOp = - tileWithLinalgTilingOptions(rewriter, linalgOp, tilingOptions); - if (failed(tiledOp)) + FailureOr maybeTilingResult = tileUsingSCFForOp( + rewriter, cast(linalgOp.getOperation()), + tilingOptions); + if (failed(maybeTilingResult)) return DiagnosedSilenceableFailure::definiteFailure(); - tiled.push_back(tiledOp->op); - for (const auto &en2 : llvm::enumerate(tiledOp->loops)) + if (linalgOp.hasBufferSemantics()) + rewriter.eraseOp(linalgOp); + else + rewriter.replaceOp(linalgOp, + maybeTilingResult->loops.front()->getResults()); + + tiled.push_back(maybeTilingResult->tiledOp); + for (const auto &en2 : llvm::enumerate(maybeTilingResult->loops)) loops[en2.index()].push_back(en2.value()); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -64,34 +64,6 @@ return *this; } -LinalgTilingOptions &mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims() { - assert(!tileSizeComputationFunction && "tile sizes already set"); - tileSizeComputationFunction = [](OpBuilder &b, Operation *op) { - SmallVector tileSizes; - auto linalgOp = dyn_cast(op); - if (!linalgOp) - return tileSizes; - Location loc = linalgOp.getLoc(); - SmallVector allShapeSizes = - linalgOp.createFlatListOfOperandDims(b, loc); - AffineMap map = linalgOp.getShapesToLoopsMap(); - if (!map) - return tileSizes; - IRRewriter rewriter(b); - SmallVector shapeSizes = - makeComposedFoldedMultiResultAffineApply(rewriter, loc, map, - allShapeSizes); - // If the shape size is dynamic, tile by 1. Otherwise, do not tile (tile - // size 0). - for (OpFoldResult shapeSize : shapeSizes) - tileSizes.push_back(getConstantIntValue(shapeSize) - ? b.create(loc, 0) - : b.create(loc, 1)); - return tileSizes; - }; - return *this; -} - /// Pad the `opOperand` in the `paddingDimensions` using the padding value and /// the nofold flag found in `paddingValues` and `packPaddings`, respectively. /// Exit early and return the `opOperand` value if the shape dimensions that @@ -246,7 +218,8 @@ /// Try to peel a loop `op` and return the new result. // TODO: Add support for scf.parallel and affine.for loops. -static SmallVector peelLoop(RewriterBase &rewriter, Operation *op) { +SmallVector mlir::linalg::peelLoop(RewriterBase &rewriter, + Operation *op) { return llvm::TypeSwitch>(op) .Case([&](scf::ForOp forOp) { scf::ForOp partialIteration; @@ -262,47 +235,8 @@ /// Peel and canonicalize 'loops'. void mlir::linalg::peelLoops(RewriterBase &rewriter, ArrayRef loops) { - for (auto loopOp : loops) { - SmallVector loopResults; - loopResults = peelLoop(rewriter, loopOp); - } -} - -/// Peel loops after tiling. -void mlir::linalg::peelTiledLinalgOp(RewriterBase &rewriter, TiledLinalgOp &res, - ArrayRef peeledLoops, - LinalgTilingLoopType loopType) { - for (int64_t loop : peeledLoops) { - assert(loop < static_cast(res.loops.size()) && - "requested peeling of non-existing loop"); - SmallVector loopResults; - Operation *loopOp = res.loops[loop]; - loopResults = peelLoop(rewriter, loopOp); - - // The result of the loop nest may change with peeling. - if (res.tensorResults.size() == loopOp->getNumResults() && - std::equal(res.tensorResults.begin(), res.tensorResults.end(), - loopOp->getResults().begin())) - res.tensorResults = loopResults; - } -} - -FailureOr -mlir::linalg::tileWithLinalgTilingOptions(RewriterBase &rewriter, LinalgOp op, - const LinalgTilingOptions &options) { - FailureOr res = tileLinalgOp(rewriter, op, options); - if (failed(res)) - return failure(); - - // Peel the loops of the TiledLinalgOp. - peelTiledLinalgOp(rewriter, *res, options.peeledLoops, options.loopType); - - if (res->tensorResults.empty()) - rewriter.eraseOp(op); - else - rewriter.replaceOp(op, res->tensorResults); - - return res; + for (auto loopOp : loops) + peelLoop(rewriter, loopOp); } /// Linalg padding pattern. diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -126,8 +126,8 @@ bindSymbols(b.getContext(), s0, s1); AffineMap minMap = AffineMap::get(1, 2, {s0, s1 - d0}, b.getContext()); Value size = getValueOrCreateConstantIndexOp(b, loc, loopRange.size); - return b.create(loc, minMap, ValueRange{iv, tileSize, size}) - .getResult(); + return makeComposedFoldedAffineMin( + b, loc, minMap, SmallVector{iv, tileSize, size}); } /// Generate an empty loop nest that represents the tiled loop nest shell. diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir --- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir +++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --test-transform-dialect-interpreter --canonicalize %s | FileCheck %s +// RUN: mlir-opt --test-transform-dialect-interpreter --scf-for-loop-canonicalization --canonicalize %s | FileCheck %s // This implements a 2D multisize tiling with target sizes [3, 10]. transform.sequence failures(propagate) { diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir --- a/mlir/test/Dialect/Linalg/promotion_options.mlir +++ b/mlir/test/Dialect/Linalg/promotion_options.mlir @@ -15,25 +15,28 @@ // CHECK: scf.for // CHECK: scf.for // CHECK: scf.for -// CHECK: %[[T7:.+]] = memref.subview %[[ARG0]] -// CHECK: %[[T12:.+]] = memref.subview %[[ARG1]] -// CHECK: %[[T17:.+]] = memref.subview %[[ARG2]] -// CHECK: %[[A0:.*]] = memref.alloc() : memref<1024xi8> -// CHECK: %[[V0:.*]] = memref.view %[[A0]][%[[C0]]][] : memref<1024xi8> to memref<16x16xf32> -// CHECK: %[[T19:.+]] = memref.subview %[[V0]] -// CHECK: %[[A1:.*]] = memref.alloc() : memref<1024xi8> -// CHECK: %[[V1:.*]] = memref.view %[[A1]][%[[C0]]][] : memref<1024xi8> to memref<16x16xf32> -// CHECK: %[[T21:.+]] = memref.subview %[[V1]] -// CHECK: memref.copy %[[T7]], %[[T19]] -// CHECK: memref.copy %[[T17]], %[[T21]] -// CHECK: linalg.matmul ins(%[[T19]], %[[T12]]{{.*}} outs(%[[T21]] -// CHECK: memref.copy %[[T21]], %[[T17]] -// CHECK: memref.dealloc %[[A0]] -// CHECK: memref.dealloc %[[A1]] +// CHECK: %[[svA:.+]] = memref.subview %[[ARG0]] +// CHECK: %[[svB:.+]] = memref.subview %[[ARG1]] +// CHECK: %[[svC:.+]] = memref.subview %[[ARG2]] + +// CHECK: %[[tmpA:.*]] = memref.alloc() : memref<1024xi8> +// CHECK: %[[VA:.*]] = memref.view %[[tmpA]][%[[C0]]][] : memref<1024xi8> to memref<16x16xf32> +// CHECK: %[[svAA:.+]] = memref.subview %[[VA]] + +// CHECK: %[[tmpC:.*]] = memref.alloc() : memref<1024xi8> +// CHECK: %[[VC:.*]] = memref.view %[[tmpC]][%[[C0]]][] : memref<1024xi8> to memref<16x16xf32> +// CHECK: %[[svCC:.+]] = memref.subview %[[VC]] + +// CHECK: memref.copy %[[svA]], %[[svAA]] +// CHECK: memref.copy %[[svC]], %[[svCC]] +// CHECK: linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} outs(%[[VC]] +// CHECK: memref.copy %[[svCC]], %[[svC]] +// CHECK: memref.dealloc %[[tmpA]] +// CHECK: memref.dealloc %[[tmpC]] transform.sequence failures(propagate) { ^bb0(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 %1, %loops:3 = transform.structured.tile %0 [16, 16, 16] - %2 = transform.structured.promote %1 { operands_to_promote = [0, 2], force_full_tiles = [false, false] } + %2 = transform.structured.promote %1 { operands_to_promote = [0, 2], force_full_tiles = [false, false], use_full_tiles_by_default } } diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -1,9 +1,8 @@ // RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize | FileCheck %s -// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 1)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 2)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 3)> +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 3)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (d0 + s0 - 1)> func.func @conv(%arg0 : memref, %arg1 : memref, %arg2 : memref) { linalg.conv_2d ins(%arg0, %arg1 : memref, memref) outs(%arg2 : memref) @@ -24,18 +23,19 @@ // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index -// CHECK-DAG: %[[T0:.*]] = memref.dim %[[ARG1]], %[[C0]] -// CHECK-DAG: %[[T1:.*]] = memref.dim %[[ARG1]], %[[C1]] -// CHECK-DAG: %[[T2:.*]] = memref.dim %[[ARG2]], %[[C0]] -// CHECK-DAG: %[[T3:.*]] = memref.dim %[[ARG2]], %[[C1]] -// CHECK: scf.for %[[ARG3:.*]] = %[[C0]] to %[[T2]] step %[[C2]] -// CHECK: scf.for %[[ARG4:.*]] = %[[C0]] to %[[T3]] step %[[C3]] -// CHECK: %[[T4:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T2]], %[[T0]]] -// CHECK: %[[T5:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T3]], %[[T1]]] -// CHECK: %[[T6:.*]] = affine.min #[[MAP2]](%[[ARG3]])[%[[T2]] -// CHECK: %[[T7:.*]] = affine.min #[[MAP3]](%[[ARG4]])[%[[T3]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] [%[[T4]], %[[T5]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]] [%[[T6]], %[[T7]]] +// CHECK-DAG: %[[KH:.*]] = memref.dim %[[ARG1]], %[[C0]] +// CHECK-DAG: %[[KW:.*]] = memref.dim %[[ARG1]], %[[C1]] +// CHECK-DAG: %[[H:.*]] = memref.dim %[[ARG2]], %[[C0]] +// CHECK-DAG: %[[W:.*]] = memref.dim %[[ARG2]], %[[C1]] +// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[H]] step %[[C2]] +// CHECK: %[[T4:.*]] = affine.min #[[MAP0]](%[[I]])[%[[H]]] +// CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[W]] step %[[C3]] +// CHECK-DAG: %[[T5:.*]] = affine.min #[[MAP1]](%[[J]])[%[[W]]] +// CHECK-DAG: %[[T6:.*]] = affine.apply #[[MAP2]](%[[T4]])[%[[KH]]] +// CHECK-DAG: %[[T7:.*]] = affine.apply #[[MAP2]](%[[T5]])[%[[KW]]] +// CHECK-DAG: %[[SVIN:.*]] = memref.subview %[[ARG0]][%[[I]], %[[J]]] [%[[T6]], %[[T7]]] +// CHECK-DAG: %[[SVKER:.*]] = memref.subview %[[ARG1]][0, 0] [%[[KH]], %[[KW]]] +// CHECK-DAG: %[[SVOUT:.*]] = memref.subview %[[ARG2]][%[[I]], %[[J]]] [%[[T4]], %[[T5]]] // CHECK: linalg.conv_2d -// CHECK-SAME: ins(%[[SV1]], %[[ARG1]] -// CHECK-SAME: outs(%[[SV2]] +// CHECK-SAME: ins(%[[SVIN]], %[[SVKER]] +// CHECK-SAME: outs(%[[SVOUT]] diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir --- a/mlir/test/Dialect/Linalg/tile-indexed.mlir +++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir @@ -14,7 +14,7 @@ transform.sequence failures(propagate) { ^bb0(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 - %1, %loop:2 = transform.structured.tile %0 [10, 25] + %1, %loop = transform.structured.tile %0 [10] } // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -88,7 +88,7 @@ // ----- -// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> // CHECK: fold_extract_slice // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor @@ -106,10 +106,10 @@ // CHECK: %[[E:.*]] = tensor.extract_slice %[[ARG0]][3, 4] [%[[DIM]], 42] [1, 1] : tensor to tensor // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = + // CHECK: %[[SIZE0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM]] // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = // Fold the existing extract slice op into the one created by the tiling. - // CHECK: %[[SIZE0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM]] // CHECK: %[[T0:.*]] = tensor.extract_slice %[[E]] // CHECK-SAME: %[[IV0]], %[[IV1]] // CHECK-SAME: %[[SIZE0]], 3 diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s --test-transform-dialect-interpreter --split-input-file | FileCheck %s +// RUN: mlir-opt %s --test-transform-dialect-interpreter --split-input-file -canonicalize | FileCheck %s // CHECK-LABEL: func.func @fuse_unary func.func @fuse_unary(%arg0: tensor, %arg1: tensor) -> tensor { @@ -61,15 +61,12 @@ // CHECK-DAG: %[[INIT:.+]] = tensor.empty() // CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index // CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index // CHECK: %[[RES:.*]] = scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]]) // CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]]) // CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]] // CHECK: %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]] // CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor) -// -// Extra 4 constant is introduced, discard it. -// CHECK: arith.constant 4 : index -// CHECK: %[[C4:.+]] = arith.constant 4 : index // CHECK: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]]) // CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]] // CHECK: %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0] diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir @@ -364,7 +364,7 @@ return %2 : tensor } -// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> +// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> // CHECK: func @matmul_sequence_fusion( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor @@ -384,7 +384,7 @@ // CHECK-DAG: %[[N3:.+]] = tensor.dim %[[ARG5]], %[[C1]] // CHECK: %[[R0:.+]] = scf.for %[[IV:[a-zA-Z0-9_]+]] = // CHECK-SAME: iter_args(%[[ARG8:.+]] = %[[ARG6]]) -> (tensor) { -// CHECK-DAG: %[[TILE_M:.+]] = affine.min #[[MAP]](%[[IV]])[%{{.+}}, %[[M]]] +// CHECK-DAG: %[[TILE_M:.+]] = affine.min #[[MAP]](%[[IV]])[%[[M]]] // CHECK-DAG: %[[SLICE_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV]], 0] [%[[TILE_M]], %[[N0]]] // CHECK-DAG: %[[SLICE_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, 0] [%[[N0]], %[[N1]]] // CHECK-DAG: %[[SLICE_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV]], 0] [%[[TILE_M]], %[[N1]]] diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir @@ -7,9 +7,9 @@ outs(%arg2 : tensor) -> tensor return %0 : tensor } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> -// CHECK: func.func @simple_matmul( +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> +// CHECK-LABEL: func.func @simple_matmul( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor @@ -22,10 +22,10 @@ // CHECK-DAG: %[[N:.+]] = tensor.dim %[[ARG1]], %[[C1]] // CHECK: %[[OUTER:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] // CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[ARG2]]) -// CHECK: %[[TS_Y:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[M]]] +// CHECK: %[[TS_Y:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[M]]] // CHECK: %[[INNER:[a-zA-Z0-9]+]] = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] // CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) -// CHECK: %[[TS_X:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C20]], %[[N]]] +// CHECK: %[[TS_X:.+]] = affine.min #[[$MAP1]](%[[IV1]])[%[[N]]] // CHECK-DAG: %[[LHS_TILE:.+]] = tensor.extract_slice %[[ARG0]] // CHECK-SAME: [%[[IV0]], 0] [%[[TS_Y]], %[[K]]] [1, 1] // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]] @@ -50,10 +50,10 @@ outs(%arg2 : memref) return } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)> -// CHECK: func.func @simple_matmul_memref( +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (30, -d0 + s0)> +// CHECK-LABEL: func.func @simple_matmul_memref( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: memref @@ -66,11 +66,11 @@ // CHECK-DAG: %[[K:.+]] = memref.dim %[[ARG0]], %[[C1]] // CHECK-DAG: %[[N:.+]] = memref.dim %[[ARG1]], %[[C1]] // CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] -// CHECK: %[[TS_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[M]]] +// CHECK: %[[TS_M:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[M]]] // CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] -// CHECK: %[[TS_N:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C20]], %[[N]]] +// CHECK: %[[TS_N:.+]] = affine.min #[[$MAP1]](%[[IV1]])[%[[N]]] // CHECK: scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[K]] step %[[C30]] -// CHECK: %[[TS_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[C30]], %[[K]]] +// CHECK: %[[TS_K:.+]] = affine.min #[[$MAP2]](%[[IV2]])[%[[K]]] // CHECK-DAG: %[[LHS_TILE:.+]] = memref.subview %[[ARG0]] // CHECK-SAME: [%[[IV0]], %[[IV2]]] [%[[TS_M]], %[[TS_K]]] [1, 1] // CHECK-DAG: %[[RHS_TILE:.+]] = memref.subview %[[ARG1]] @@ -100,8 +100,8 @@ } -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>) return %0#0, %0#1 : tensor<128x300x200xf32>, tensor<300x128x200xf32> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> -// CHECK: func.func @multi_result( +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (10, -d0 + 128)> +// CHECK-LABEL: func.func @multi_result( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<128x200x300xf32>) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index @@ -112,7 +112,7 @@ // CHECK-DAG: %[[INIT1:.+]] = tensor.empty() // CHECK: %[[OUTER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[C128]] step %[[C10]] // CHECK-SAME: iter_args(%[[ARG1:[a-zA-Z0-9]+]] = %[[INIT0]], %[[ARG2:[a-zA-Z0-9]+]] = %[[INIT1]]) -// CHECK: %[[TS_Y:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[C128]]] +// CHECK: %[[TS_Y:.+]] = affine.min #[[$MAP0]](%[[IV0]]) // CHECK: %[[INNER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[C300]] step %[[C20]] // CHECK-SAME: iter_args(%[[ARG3:[a-zA-Z0-9]+]] = %[[ARG1]], %[[ARG4:[a-zA-Z0-9]+]] = %[[ARG2]]) // CHECK-DAG: %[[ARG_TILE:.+]] = tensor.extract_slice %[[ARG0]] @@ -144,12 +144,12 @@ outs(%arg2 : tensor) -> tensor return %0 : tensor } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 2 - 2)> -// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 3 - 3)> -// CHECK: func.func @conv2D( +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (30, -d0 + s0)> +// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 2 - 2)> +// CHECK-DAG: #[[$MAP4:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 3 - 3)> +// CHECK-LABEL: func.func @conv2D( // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[FILTER:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: tensor @@ -169,15 +169,15 @@ // CHECK-DAG: %[[S:.+]] = tensor.dim %[[INIT]], %[[C2]] // CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[P]] step %[[C10]] // CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[INIT]]) -// CHECK: %[[TS_P:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[P]]] +// CHECK: %[[TS_P:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[P]]] // CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[Q]] step %[[C20]] // CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) -// CHECK: %[[TS_Q:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C20]], %[[Q]]] +// CHECK: %[[TS_Q:.+]] = affine.min #[[$MAP1]](%[[IV1]])[%[[Q]]] // CHECK: scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[C]] step %[[C30]] // CHECK-SAME: iter_args(%[[INIT2:.+]] = %[[INIT1]]) -// CHECK-DAG: %[[TS_C:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[C30]], %[[C]]] -// CHECK-DAG: %[[TS_H:.+]] = affine.apply #[[MAP3]](%[[TS_P]])[%[[R]]] -// CHECK-DAG: %[[TS_W:.+]] = affine.apply #[[MAP4]](%[[TS_Q]])[%[[S]]] +// CHECK-DAG: %[[TS_C:.+]] = affine.min #[[$MAP2]](%[[IV2]])[%[[C]]] +// CHECK-DAG: %[[TS_H:.+]] = affine.apply #[[$MAP3]](%[[TS_P]])[%[[R]]] +// CHECK-DAG: %[[TS_W:.+]] = affine.apply #[[$MAP4]](%[[TS_Q]])[%[[S]]] // CHECK-DAG: %[[INPUT_TILE:.+]] = tensor.extract_slice %[[INPUT]] // CHECK-SAME: [0, %[[IV0]], %[[IV1]], %[[IV2]]] [%[[N]], %[[TS_H]], %[[TS_W]], %[[TS_C]]] // CHECK-DAG: %[[FILTER_TILE:.+]] = tensor.extract_slice %[[FILTER]] @@ -234,10 +234,10 @@ outs(%arg2 : tensor) -> tensor return %0 : tensor } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> -// CHECK: func.func @interchange_matmul( +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (30, -d0 + s0)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> +// CHECK-LABEL: func.func @interchange_matmul( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor @@ -251,13 +251,13 @@ // CHECK-DAG: %[[N:.+]] = tensor.dim %[[ARG1]], %[[C1]] // CHECK: %[[OUTER:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] // CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[ARG2]]) -// CHECK: %[[TS_N:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C20]], %[[N]]] +// CHECK: %[[TS_N:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[N]]] // CHECK: %[[INNER1:[a-zA-Z0-9]+]] = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[K]] step %[[C30]] // CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) -// CHECK: %[[TS_K:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C30]], %[[K]]] +// CHECK: %[[TS_K:.+]] = affine.min #[[$MAP1]](%[[IV1]])[%[[K]]] // CHECK: %[[INNER2:[a-zA-Z0-9]+]] = scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] // CHECK-SAME: iter_args(%[[INIT2:.+]] = %[[INIT1]]) -// CHECK-DAG: %[[TS_M:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[C10]], %[[M]]] +// CHECK-DAG: %[[TS_M:.+]] = affine.min #[[$MAP2]](%[[IV2]])[%[[M]]] // CHECK-DAG: %[[LHS_TILE:.+]] = tensor.extract_slice %[[ARG0]] // CHECK-SAME: [%[[IV2]], %[[IV1]]] [%[[TS_M]], %[[TS_K]]] [1, 1] // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]]