diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -38,11 +38,6 @@ std::unique_ptr createLinalgNamedOpConversionPass(); -std::unique_ptr> -createLinalgTilingPass(ArrayRef tileSizes = {}, - linalg::LinalgTilingLoopType loopType = - linalg::LinalgTilingLoopType::Loops); - std::unique_ptr> createLinalgInlineScalarOperandsPass(); diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -102,22 +102,6 @@ ]; } -def LinalgTilingPass : Pass<"linalg-tile", "func::FuncOp"> { - let summary = "Tile operations in the linalg dialect"; - let constructor = "mlir::createLinalgTilingPass()"; - let dependentDialects = [ - "AffineDialect", - "linalg::LinalgDialect", - "memref::MemRefDialect", - "scf::SCFDialect" - ]; - let options = [ - ListOption<"tileSizes", "tile-sizes", "int64_t", "Tile sizes">, - Option<"loopType", "loop-type", "std::string", /*default=*/"\"for\"", - "Specify the type of loops to generate: for, parallel"> - ]; -} - def LinalgGeneralization : Pass<"linalg-generalize-named-ops", "func::FuncOp"> { let summary = "Convert named ops into generic ops"; let constructor = "mlir::createLinalgGeneralizationPass()"; @@ -162,19 +146,6 @@ ]; } -def LinalgStrategyTilePass - : Pass<"linalg-strategy-tile-pass", "func::FuncOp"> { - let summary = "Configurable pass to apply pattern-based linalg tiling."; - let constructor = "mlir::createLinalgStrategyTilePass()"; - let dependentDialects = ["linalg::LinalgDialect"]; - let options = [ - Option<"anchorFuncName", "anchor-func", "std::string", /*default=*/"", - "Which func op is the anchor to latch on.">, - Option<"anchorOpName", "anchor-op", "std::string", /*default=*/"", - "Which linalg op within the func is the anchor to latch on.">, - ]; -} - def LinalgStrategyRemoveMarkersPass : Pass<"linalg-strategy-remove-markers-pass", "func::FuncOp"> { let summary = "Cleanup pass that drops markers."; diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -732,6 +732,63 @@ }]; } +def TileToScfForOp : Op, + DeclareOpInterfaceMethods]> { + let description = [{ + Indicates that the given `target` op should be tiled with the given sizes. + This transform generates a loop nest with a smaller ("tiled") target + operation in its body. The target must implement TilingInterface. + + Tile sizes may be known at transformation time, in which case they are + expected to be provided in the `static_size` attribute, or not, in which + case the tile value must be computed by the payload IR and the handle to the + operation computing it must be provided through `dynamic_sizes`. When the + sizes are not known statically, the corresponding entry in the + `static_sizes` attribute must be set to `ShapedType::kDynamicSize`. Only + the dynamic sizes must be provided in `dynamic_sizes`, i.e., there should + be as many handles as `ShapedType::kDynamicSize` values in the + `static_sizes` attribute. A static size of `0` indicates that the dimension + should not be tiled. No loop will be generated for such dimensions. If all + tile sizes are `0`, this transform is effectively a no-op. + + This op returns handles to the tiled op (in the generated loop nest) and the + generated loops. The number of loops is the number of tile sizes that are + statically known to be non-zero. + + #### Return modes + + On success, the resulting handles are associated with co-indexed lists of + tiled operations and loops around them. + + This operation only supports TilingInterface ops and produces a silenceable + failure if the input contains any non-TilingInterface ops. The ops preceding + it in the list associated with the `target` handle will have been tiled. + + This operation produces a silenceable failure if the `dynamic_sizes` handles + are associated with lists of payload operations of a size different than + that of the list associated with the `target` handle. + + If the internal implementation of tiling for any of the operations fails, + produces a definite failure. + }]; + + let arguments = (ins PDL_Operation:$target, + Variadic:$dynamic_sizes, + DefaultValuedAttr:$static_sizes, + DefaultValuedAttr:$interchange); + let results = (outs PDL_Operation:$tiled_linalg_op, + Variadic:$loops); + + let hasCustomAssemblyFormat = 1; + + let extraClassDeclaration = [{ + /// Returns the list of tile sizes, which may be static (Attribute) or + /// dynamic (Value). + SmallVector getMixedSizes(); + }]; +} + def VectorizeOp : Op { diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h @@ -30,41 +30,8 @@ LinalgTransformationFilter::FilterFunction filter = nullptr; }; -/// Represent one application of LinalgStrategyTilePass. -struct Tile : public Transformation { - Tile(StringRef name, linalg::LinalgTilingOptions options, - LinalgTransformationFilter::FilterFunction f = nullptr) - : Transformation(std::move(f)), opName(name), - options(std::move(options)) {} - - void addToPassPipeline(OpPassManager &pm, - LinalgTransformationFilter m) const override { - pm.addPass(createLinalgStrategyTilePass(opName, options, m)); - } - -private: - std::string opName; - linalg::LinalgTilingOptions options; -}; - /// Codegen strategy controls how a Linalg op is progressively lowered. struct CodegenStrategy { - /// Append a pattern to add a level of tiling for Op `opName` with tiling - /// `options`. - CodegenStrategy & - tile(StringRef opName, const linalg::LinalgTilingOptions &options, - const LinalgTransformationFilter::FilterFunction &f = nullptr) { - transformationSequence.emplace_back( - std::make_unique(opName, options, f)); - return *this; - } - /// Conditionally append a pattern to add a level of tiling for - /// `LinalgOpType` with tiling `options`. - CodegenStrategy & - tileIf(bool b, StringRef opName, linalg::LinalgTilingOptions options, - LinalgTransformationFilter::FilterFunction f = nullptr) { - return b ? tile(opName, std::move(options), std::move(f)) : *this; - } /// Configure the post staged-patterns global enabling passes options. CodegenStrategy & setVectorTransferToSCFOptions(LinalgEnablingOptions options) { diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -684,57 +684,25 @@ RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx); void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns); -/// -/// Linalg tiling pattern. -/// -/// Apply the `tiling` transformation as a pattern. -/// `filter` controls LinalgTransformMarker matching and update when specified. -/// See `tiling` for more details. -// TODO: TiledOpInterface -struct LinalgTilingPattern : public OpInterfaceRewritePattern { - /// Construct a generic pattern applied to all LinalgOp that verify `filter`. - LinalgTilingPattern( - MLIRContext *context, LinalgTilingOptions options, - LinalgTransformationFilter f = LinalgTransformationFilter(), - PatternBenefit benefit = 1); - - /// Construct a pattern specifically applied to `opName`. - LinalgTilingPattern( - StringRef opName, MLIRContext *context, LinalgTilingOptions options, - LinalgTransformationFilter f = LinalgTransformationFilter(), - PatternBenefit benefit = 1); - - /// `matchAndRewrite` implementation that returns the significant transformed - /// pieces of IR. - FailureOr - returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const; - - LogicalResult matchAndRewrite(LinalgOp op, - PatternRewriter &rewriter) const override { - return returningMatchAndRewrite(op, rewriter); - } - -private: - /// LinalgTransformMarker handles special attribute manipulations. - LinalgTransformationFilter filter; - /// Options to control tiling; - LinalgTilingOptions options; -}; +/// Perform tile and X. +FailureOr tileAndX(RewriterBase &rewriter, LinalgOp op, + const LinalgTilingOptions &options); /// /// Linalg padding pattern. /// /// Apply the `padding` transformation as a pattern. -/// `filter` controls LinalgTransformMarker matching and update when specified. -/// See `padding` for more details. +/// `filter` controls LinalgTransformMarker matching and update when +/// specified. See `padding` for more details. struct LinalgPaddingPattern : public OpInterfaceRewritePattern { - /// Construct a generic pattern applied to all LinalgOp that verify `filter`. + /// Construct a generic pattern applied to all LinalgOp that verify + /// `filter`. LinalgPaddingPattern(MLIRContext *context, LinalgPaddingOptions options = LinalgPaddingOptions(), PatternBenefit benefit = 1); - /// `matchAndRewrite` implementation that returns the significant transformed - /// pieces of IR. + /// `matchAndRewrite` implementation that returns the significant + /// transformed pieces of IR. FailureOr returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const; @@ -791,11 +759,12 @@ /// Linalg generalization pattern. /// /// Apply the `generalization` transformation as a pattern. -/// `filter` controls LinalgTransformMarker matching and update when specified. -/// See `generalization` for more details. +/// `filter` controls LinalgTransformMarker matching and update when +/// specified. See `generalization` for more details. struct LinalgGeneralizationPattern : public OpInterfaceRewritePattern { - /// Construct a generic pattern applied to all LinalgOp that verify `filter`. + /// Construct a generic pattern applied to all LinalgOp that verify + /// `filter`. LinalgGeneralizationPattern( MLIRContext *context, LinalgTransformationFilter f = LinalgTransformationFilter(), @@ -807,8 +776,8 @@ LinalgTransformationFilter f = LinalgTransformationFilter(), PatternBenefit benefit = 1); - /// `matchAndRewrite` implementation that returns the significant transformed - /// pieces of IR. + /// `matchAndRewrite` implementation that returns the significant + /// transformed pieces of IR. FailureOr returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const; @@ -828,8 +797,8 @@ /// Empty for now, used for SFINAE purposes only. struct LinalgVectorizationOptions {}; -/// `filter` controls LinalgTransformMarker matching and update when specified. -/// See `vectorizeLinalgOp` for more details. +/// `filter` controls LinalgTransformMarker matching and update when +/// specified. See `vectorizeLinalgOp` for more details. struct CopyVectorizationPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -875,8 +844,8 @@ /// Linalg lowering patterns. /// /// Apply the `linalgLowerOpToLoops` transformation as a pattern. -/// `filter` controls LinalgTransformMarker matching and update when specified. -/// See `linalgLowerOpToLoops` for more details. +/// `filter` controls LinalgTransformMarker matching and update when +/// specified. See `linalgLowerOpToLoops` for more details. enum class LinalgLoweringType { LibraryCall = 0, Loops = 1, @@ -927,8 +896,8 @@ private: /// LinalgTransformMarker handles special attribute manipulations. LinalgTransformationFilter filter; - /// Controls whether the pattern lowers to library calls, scf.for, affine.for - /// or scf.parallel. + /// Controls whether the pattern lowers to library calls, scf.for, + /// affine.for or scf.parallel. LinalgLoweringType loweringType; }; @@ -942,9 +911,9 @@ /// Linalg decompose convolutions patterns -/// Populates patterns to decompose high-D convolution ops into low-D ones. This -/// is a step in progressive lowering for convolution ops, afterwards we can -/// vectorize the low-D convolution ops. +/// Populates patterns to decompose high-D convolution ops into low-D ones. +/// This is a step in progressive lowering for convolution ops, afterwards we +/// can vectorize the low-D convolution ops. void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); @@ -952,8 +921,8 @@ // Op-specific patterns. //===----------------------------------------------------------------------===// -/// tensor::PadOp is not canonicalized away yet, so we provide a transformation -/// to `linalg.generic`. +/// tensor::PadOp is not canonicalized away yet, so we provide a +/// transformation to `linalg.generic`. struct PadOpTransformationPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -961,12 +930,12 @@ PatternRewriter &rewriter) const override; }; -/// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands to -/// a static bounding box. Use `paddingValues` and `packPaddings` to set padding -/// value and nofold attribute of the created tensor::PadOps, respectively. -/// Update `paddedOp` to the cloned operation with statically shaped -/// `paddingDimensions` and return the extracted dynamically shaped results. If -/// padding fails, return failure. +/// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands +/// to a static bounding box. Use `paddingValues` and `packPaddings` to set +/// padding value and nofold attribute of the created tensor::PadOps, +/// respectively. Update `paddedOp` to the cloned operation with statically +/// shaped `paddingDimensions` and return the extracted dynamically shaped +/// results. If padding fails, return failure. FailureOr> rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, ArrayRef paddingDimensions, @@ -1053,7 +1022,8 @@ /// vector.transfer_write %..., %out[...] /// ``` /// Where there is no interleaved use between transfer_write and memref.copy. -/// This is a custom rewrite to forward partial writes to vector.transfer_write. +/// This is a custom rewrite to forward partial writes to +/// vector.transfer_write. struct LinalgCopyVTWForwardingPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -1120,35 +1090,13 @@ const LinalgTransformationFilter &f) {} }; -template -class TilingPatterns; - -template <> -class TilingPatterns<> { -public: - static void insert(RewritePatternSet &patterns, - const LinalgTilingOptions &options, - const LinalgTransformationFilter &f) {} -}; - -template -class TilingPatterns { -public: - static void insert(RewritePatternSet &patterns, - const LinalgTilingOptions &options, - const LinalgTransformationFilter &f) { - patterns.add(OpTy::getOperationName(), - patterns.getContext(), options, f); - TilingPatterns::insert(patterns, options, f); - } -}; - /// Split Reduction options. struct SplitReductionOptions { - // Ratio used to split the reduction dimension. If the ratio is <= 1, nothing - // will be done. + // Ratio used to split the reduction dimension. If the ratio is <= 1, + // nothing will be done. int64_t ratio = 0; - // Index where the extra dimension is added to the intermediate tensor shape. + // Index where the extra dimension is added to the intermediate tensor + // shape. unsigned index = 0; // If the inner dimension after splitting is parallel or reduction. bool innerParallel = false; @@ -1167,10 +1115,10 @@ const LinalgTransformationFilter &f = LinalgTransformationFilter(), bool useAlloc = false); -/// Apply transformation to split the single linalg op reduction into a parallel -/// and reduction dimension. Then create a new linalg.generic op doing the rest -/// of the reduction. Return the new linalg op with an extra parallel dimension -/// or failure if the transformation didn't happen. +/// Apply transformation to split the single linalg op reduction into a +/// parallel and reduction dimension. Then create a new linalg.generic op +/// doing the rest of the reduction. Return the new linalg op with an extra +/// parallel dimension or failure if the transformation didn't happen. /// Example: /// ``` /// %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, @@ -1186,10 +1134,10 @@ /// To: /// ``` /// %cst = arith.constant 0.000000e+00 : f32 -/// %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32> -/// %1 = tensor.empty [4] : tensor<4xf32> -/// %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> -/// %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, +/// %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into +/// tensor<4x8xf32> %1 = tensor.empty [4] : tensor<4xf32> %2 = linalg.fill +/// ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> %3 = +/// linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, /// affine_map<(d0, d1) -> (d0)>], /// iterator_types = ["parallel", "reduction"]} /// ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) { @@ -1226,8 +1174,8 @@ bool useAlloc = false); /// Scaling-based implementation of the split reduction transformation. -/// Instead of introducing an ExpandShapeOp, this rewrites a reduction dimension -/// `k` into `k * scale + kk`. +/// Instead of introducing an ExpandShapeOp, this rewrites a reduction +/// dimension `k` into `k * scale + kk`. /// /// Example: /// ``` @@ -1252,8 +1200,8 @@ /// /// %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3], /// iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -/// ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>) -/// outs(%1 : tensor<16x32x64xf32>) { +/// ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, +/// tensor<64x4xi1>) outs(%1 : tensor<16x32x64xf32>) { /// ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32): /// %5 = arith.mulf %arg3, %arg4 : f32 /// %6 = arith.addf %arg6, %5 : f32 diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -9,6 +9,7 @@ #include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h" #include "mlir/AsmParser/AsmParser.h" +#include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" @@ -22,6 +23,8 @@ #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/StringSet.h" +#include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h" + using namespace mlir; using namespace mlir::linalg; using namespace mlir::transform; @@ -36,6 +39,16 @@ return result; } +/// Extracts a vector of int64_t from an array attribute. Asserts if the +/// attribute contains values other than integers. +static SmallVector extractI64Array(ArrayAttr attr) { + SmallVector result; + result.reserve(attr.size()); + for (APInt value : attr.getAsValueRange()) + result.push_back(value.getSExtValue()); + return result; +} + namespace { /// A simple pattern rewriter that implements no special logic. class SimpleRewriter : public PatternRewriter { @@ -721,11 +734,9 @@ // Tiling with "scalarize_dyn_dims" actually sets the same lambda as the // tile sizes and asserts that it is not already set. SmallVector emptyTileSizes; - LinalgTilingPattern pattern(getContext(), tilingOptions); SimpleRewriter rewriter(getContext()); rewriter.setInsertionPoint(target); - FailureOr result = - pattern.returningMatchAndRewrite(target, rewriter); + FailureOr result = tileAndX(rewriter, target, tilingOptions); if (failed(result)) return DiagnosedSilenceableFailure(reportUnknownTransformError(target)); @@ -915,7 +926,6 @@ DiagnosedSilenceableFailure transform::TileOp::apply(TransformResults &transformResults, TransformState &state) { - LinalgTilingOptions tilingOptions; SmallVector tileSizes = extractFromI64ArrayAttr(getStaticSizes()); ArrayRef targets = state.getPayloadOps(getTarget()); @@ -960,6 +970,7 @@ return diag; } + LinalgTilingOptions tilingOptions; unsigned index = en.index(); if (!tileSizes.empty()) { tilingOptions.setTileSizeComputationFunction( @@ -981,10 +992,9 @@ } tilingOptions.setInterchange(extractUIntArray(getInterchange())); - LinalgTilingPattern pattern(getContext(), tilingOptions); SimpleRewriter rewriter(linalgOp.getContext()); FailureOr tiledOp = - pattern.returningMatchAndRewrite(linalgOp, rewriter); + tileAndX(rewriter, linalgOp, tilingOptions); if (failed(tiledOp)) return DiagnosedSilenceableFailure::definiteFailure(); @@ -1203,6 +1213,153 @@ return success(); } +//===----------------------------------------------------------------------===// +// TileToScfForOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure +transform::TileToScfForOp::apply(TransformResults &transformResults, + TransformState &state) { + SmallVector tileSizes = extractFromI64ArrayAttr(getStaticSizes()); + + ArrayRef targets = state.getPayloadOps(getTarget()); + SmallVector> dynamicSizeProducers; + dynamicSizeProducers.reserve(getDynamicSizes().size()); + for (Value dynamicSizeProducerHandle : getDynamicSizes()) { + dynamicSizeProducers.push_back( + state.getPayloadOps(dynamicSizeProducerHandle)); + + if (dynamicSizeProducers.back().size() != targets.size()) { + DiagnosedSilenceableFailure diag = + emitSilenceableError() + << "expected as many dynamic size-producing operations (" + << dynamicSizeProducers.back().size() << ") as target ops (" + << targets.size() << ")"; + diag.attachNote(dynamicSizeProducerHandle.getLoc()) << "for this handle"; + return diag; + } + + for (Operation *op : dynamicSizeProducers.back()) { + if (op->getNumResults() == 1 && + op->getResult(0).getType().isa()) + continue; + DiagnosedSilenceableFailure diag = + emitSilenceableError() << "expected sizes to be produced by ops " + "with a single index-type result"; + diag.attachNote(op->getLoc()) << "size producer op"; + diag.attachNote(dynamicSizeProducerHandle.getLoc()) << "for this handle"; + return diag; + } + } + + SmallVector tiled; + SmallVector, 4> loops; + loops.resize(getLoops().size()); + for (auto &en : llvm::enumerate(targets)) { + auto tilingInterfaceOp = dyn_cast(en.value()); + if (!tilingInterfaceOp) { + DiagnosedSilenceableFailure diag = + emitSilenceableError() << "only TilingInterface ops are supported"; + diag.attachNote(en.value()->getLoc()) << "target op"; + return diag; + } + + scf::SCFTilingOptions tilingOptions; + unsigned index = en.index(); + if (!tileSizes.empty()) { + tilingOptions.setTileSizeComputationFunction( + [&, index](OpBuilder &b, Operation *) { + SmallVector sizes; + sizes.reserve(tileSizes.size()); + unsigned dynamicIdx = 0; + for (OpFoldResult ofr : getMixedSizes()) { + if (auto attr = ofr.dyn_cast()) { + sizes.push_back(b.create( + getLoc(), attr.cast().getInt())); + } else { + sizes.push_back( + dynamicSizeProducers[dynamicIdx++][index]->getResult(0)); + } + } + return sizes; + }); + } + + tilingOptions.setInterchange(extractI64Array(getInterchange())); + SimpleRewriter rewriter(tilingInterfaceOp.getContext()); + FailureOr tilingResult = + tileUsingSCFForOp(rewriter, tilingInterfaceOp, tilingOptions); + if (failed(tilingResult)) + return DiagnosedSilenceableFailure::definiteFailure(); + + rewriter.replaceOp(tilingInterfaceOp, tilingResult->replacements); + + tiled.push_back(tilingResult->tiledOp); + for (const auto &en2 : llvm::enumerate(tilingResult->loops)) + loops[en2.index()].push_back(en2.value()); + } + + transformResults.set(getTiledLinalgOp().cast(), tiled); + for (const auto &en : llvm::enumerate(loops)) + transformResults.set(getLoops()[en.index()].cast(), en.value()); + + return DiagnosedSilenceableFailure::success(); +} + +SmallVector transform::TileToScfForOp::getMixedSizes() { + ValueRange dynamic = getDynamicSizes(); + SmallVector tileSizes = extractFromI64ArrayAttr(getStaticSizes()); + SmallVector results; + results.reserve(tileSizes.size()); + unsigned dynamicPos = 0; + Builder builder(getContext()); + for (int64_t size : tileSizes) { + if (size == ShapedType::kDynamicSize) { + results.push_back(dynamic[dynamicPos++]); + } else { + results.push_back(builder.getIndexAttr(size)); + } + } + return results; +} + +ParseResult transform::TileToScfForOp::parse(OpAsmParser &parser, + OperationState &result) { + OpAsmParser::UnresolvedOperand target; + SmallVector dynamicSizes; + ArrayAttr staticSizes; + auto pdlOperationType = pdl::OperationType::get(parser.getContext()); + if (parser.parseOperand(target) || + parser.resolveOperand(target, pdlOperationType, result.operands) || + parseDynamicIndexList(parser, dynamicSizes, staticSizes, + ShapedType::kDynamicSize) || + parser.resolveOperands(dynamicSizes, pdlOperationType, result.operands) || + parser.parseOptionalAttrDict(result.attributes)) + return ParseResult::failure(); + + result.addAttribute(getStaticSizesAttrName(result.name), staticSizes); + size_t numExpectedLoops = + staticSizes.size() - llvm::count(extractFromI64ArrayAttr(staticSizes), 0); + result.addTypes(SmallVector(numExpectedLoops + 1, pdlOperationType)); + return success(); +} + +void TileToScfForOp::print(OpAsmPrinter &p) { + p << ' ' << getTarget(); + printDynamicIndexList(p, getOperation(), getDynamicSizes(), getStaticSizes(), + ShapedType::kDynamicSize); + p.printOptionalAttrDict((*this)->getAttrs(), {getStaticSizesAttrName()}); +} + +void transform::TileToScfForOp::getEffects( + SmallVectorImpl &effects) { + consumesHandle(getTarget(), effects); + onlyReadsHandle(getDynamicSizes(), effects); + producesHandle(getTiledLinalgOp(), effects); + producesHandle(getLoops(), effects); + modifiesPayload(effects); +} + //===----------------------------------------------------------------------===// // VectorizeOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp @@ -51,40 +51,6 @@ namespace { -/// Configurable pass to apply pattern-based linalg tiling. -struct LinalgStrategyTilePass - : public impl::LinalgStrategyTilePassBase { - - LinalgStrategyTilePass() = default; - - LinalgStrategyTilePass(StringRef opName, - mlir::linalg::LinalgTilingOptions opt, - LinalgTransformationFilter filt) - : options(std::move(opt)), filter(std::move(filt)) { - this->anchorOpName.setValue(opName.str()); - } - - void runOnOperation() override { - auto funcOp = getOperation(); - if (!anchorFuncName.empty() && funcOp.getName() != anchorFuncName) - return; - - MLIRContext *ctx = funcOp.getContext(); - RewritePatternSet tilingPattern(ctx); - if (!anchorOpName.empty()) - tilingPattern.add(anchorOpName, ctx, options, - filter); - else - tilingPattern.add(ctx, options, filter); - if (anchorOpName == tensor::PadOp::getOperationName()) - populatePadTensorTilingPatterns(tilingPattern, options); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern)); - } - - mlir::linalg::LinalgTilingOptions options; - LinalgTransformationFilter filter; -}; - /// Configurable pass to lower vector operations. struct LinalgStrategyRemoveMarkersPass : public impl::LinalgStrategyRemoveMarkersPassBase< @@ -101,14 +67,6 @@ }; } // namespace -/// Create a LinalgStrategyTilePass. -std::unique_ptr> -mlir::createLinalgStrategyTilePass(StringRef opName, - const LinalgTilingOptions &opt, - const LinalgTransformationFilter &filter) { - return std::make_unique(opName, opt, filter); -} - /// Create a LinalgStrategyRemoveMarkersPass. std::unique_ptr> mlir::createLinalgStrategyRemoveMarkersPass() { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -732,77 +732,8 @@ >::insert(patterns); } -/// Populate the given list with patterns that apply Linalg tiling. -static void insertTilingPatterns(RewritePatternSet &patterns, - const LinalgTilingOptions &options) { - auto *ctx = patterns.getContext(); - LinalgTransformationFilter f(ArrayRef{}, - StringAttr::get(ctx, "tiled")); - TilingPatterns::insert(patterns, options, f); - patterns.add(ctx, options); -} - void mlir::linalg::populatePadTensorTilingPatterns( RewritePatternSet &patterns, const LinalgTilingOptions &options) { auto *ctx = patterns.getContext(); patterns.add(ctx, options); } - -static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp) { - MLIRContext *ctx = funcOp.getContext(); - RewritePatternSet patterns(ctx); - patterns.add(patterns.getContext()); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); - (void)applyPatternsAndFoldGreedily( - funcOp, getLinalgTilingCanonicalizationPatterns(ctx)); -} - -namespace { -struct LinalgTilingPass : public impl::LinalgTilingPassBase { - LinalgTilingPass() = default; - LinalgTilingPass(ArrayRef tileSizes, LinalgTilingLoopType loopType) { - this->tileSizes = tileSizes; - this->loopType = ""; - this->loopTypeEnum = loopType; - } - - void runOnOperation() override { - func::FuncOp funcOp = getOperation(); - LinalgTilingLoopType type = - llvm::StringSwitch(loopType) - .Case("for", LinalgTilingLoopType::Loops) - .Case("affine", LinalgTilingLoopType::AffineLoops) - .Case("parallel", LinalgTilingLoopType::ParallelLoops) - .Default(loopTypeEnum); - auto options = - LinalgTilingOptions().setTileSizes(tileSizes).setLoopType(type); - MLIRContext *ctx = funcOp.getContext(); - RewritePatternSet patterns(ctx); - insertTilingPatterns(patterns, options); - scf::populateSCFForLoopCanonicalizationPatterns(patterns); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); - (void)applyPatternsAndFoldGreedily( - funcOp, getLinalgTilingCanonicalizationPatterns(ctx)); - // Drop the marker. - funcOp.walk([](LinalgOp op) { - op->removeAttr(LinalgTransforms::kLinalgTransformMarker); - }); - - // Apply swap pattern after generating loop nest and running - // canonicalizations. - applyExtractSliceOfPadTensorSwapPattern(funcOp); - } - - LinalgTilingLoopType loopTypeEnum; -}; - -} // namespace - -std::unique_ptr> -mlir::createLinalgTilingPass(ArrayRef tileSizes, - linalg::LinalgTilingLoopType loopType) { - return std::make_unique(tileSizes, loopType); -} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -356,33 +356,13 @@ } } -/// Linalg tiling pattern. -mlir::linalg::LinalgTilingPattern::LinalgTilingPattern( - MLIRContext *context, LinalgTilingOptions options, - LinalgTransformationFilter f, PatternBenefit benefit) - : OpInterfaceRewritePattern(context, benefit), - filter(std::move(f)), options(std::move(options)) {} - -mlir::linalg::LinalgTilingPattern::LinalgTilingPattern( - StringRef opName, MLIRContext *context, LinalgTilingOptions options, - LinalgTransformationFilter f, PatternBenefit benefit) - : OpInterfaceRewritePattern(context, benefit), - filter(f.addOpNameFilter(opName)), options(std::move(options)) {} - FailureOr -mlir::linalg::LinalgTilingPattern::returningMatchAndRewrite( - LinalgOp op, PatternRewriter &rewriter) const { - if (failed(filter.checkAndNotify(rewriter, op))) - return failure(); - +mlir::linalg::tileAndX(RewriterBase &rewriter, LinalgOp op, + const LinalgTilingOptions &options) { FailureOr res = tileLinalgOp(rewriter, op, options); if (failed(res)) return failure(); - // Clear filter to stop recursive pattern application. - // This must be done here to properly propagate to peeling branches. - filter.replaceLinalgTransformationFilter(rewriter, res->op); - // Peel the loops of the TiledLinalgOp. peelTiledLinalgOp(rewriter, *res, options.peeledLoops, options.loopType); diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir +++ /dev/null @@ -1,219 +0,0 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns=test-tile-and-distribute-options -split-input-file | FileCheck %s - -func.func @gemm1(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute1"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm1( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm2(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute2"} - ins(%a, %b: memref, memref) - outs(%c:memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm2( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK: %[[ITERY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[ITERX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[INBOUNDSY:.*]] = arith.cmpi slt, %[[ITERY]], %{{.*}} -// CHECK: %[[INBOUNDSX:.*]] = arith.cmpi slt, %[[ITERX]], %{{.*}} -// CHECK: %[[INBOUNDS:.*]] = arith.andi %[[INBOUNDSY]], %[[INBOUNDSX]] -// CHECK: scf.if %[[INBOUNDS]] -// CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm3(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute3"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm3( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x -// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] -// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]] -// CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[LBY]], %[[LBX]]) to (%{{.*}}, %{{.*}}) step (%[[STEPY]], %[[STEPX]]) -// CHECK: scf.for %[[ARG5:.*]] = -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG5]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG5]], %[[ARG4]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm4(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute4"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm4( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBX]], %{{.*}} -// CHECK: scf.if %[[INBOUNDS]] -// CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm5(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute5"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm5( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x -// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]] -// CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBY]], %{{.*}} -// CHECK: scf.if %[[INBOUNDS]] -// CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBX]]) to (%{{.*}}) step (%[[STEPX]]) -// CHECK: scf.for %[[ARG4:.*]] = -// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[ARG3]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm6(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute6"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm6( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] -// CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]]) -// CHECK: scf.for %[[ARG4:.*]] = -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -// CHECK: #[[MULMAP:.+]] = affine_map<()[s0, s1] -> (s0 * s1)> -// CHECK: #[[ADDMAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> -// CHECK: func @matmul_tensors( -// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor -// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor -// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor) -> tensor { -func.func @matmul_tensors( - %arg0: tensor, %arg1: tensor, %arg2: tensor) - -> tensor { -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x -// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]] -// CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] -// CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]] -// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor) { -// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDX]], %[[C8]]] -// CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] -// CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]] -// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { -// CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor) { -// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor to tensor -// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor -// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor -// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) -// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor -// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor -// CHECK: scf.yield %[[TD]] : tensor -// CHECK: scf.yield %[[TD2]] : tensor -// CHECK: scf.yield %[[TD1]] : tensor - %0 = linalg.matmul {__internal_linalg_transform__ = "tensors_distribute1"} - ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) - -> tensor - -// CHECK: return %[[TD0]] : tensor - return %0 : tensor -} - diff --git a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir +++ /dev/null @@ -1,110 +0,0 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-pattern tile-sizes=256,128,512 peeled-loops=0" -canonicalize | \ -// RUN: FileCheck %s -check-prefix=CHECK-PEEL-0 - -// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-pattern tile-sizes=256,128,512 peeled-loops=1,2" -canonicalize | \ -// RUN: FileCheck %s -check-prefix=CHECK-PEEL-12 - -// CHECK-PEEL-0: func @matmul_static_tensor -// CHECK-PEEL-0-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-PEEL-0-DAG: %[[c128:.*]] = arith.constant 128 : index -// CHECK-PEEL-0-DAG: %[[c256:.*]] = arith.constant 256 : index -// CHECK-PEEL-0-DAG: %[[c512:.*]] = arith.constant 512 : index -// CHECK-PEEL-0-DAG: %[[c1280:.*]] = arith.constant 1280 : index -// CHECK-PEEL-0-DAG: %[[c1600:.*]] = arith.constant 1600 : index -// CHECK-PEEL-0-DAG: %[[c1700:.*]] = arith.constant 1700 : index -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1280]] step %[[c256]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1700]] step %[[c128]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} { -// CHECK-PEEL-0: linalg.matmul ins({{.*}} : tensor<256x?xf32>, tensor) outs({{.*}} : tensor<256x?xf32>) -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1700]] step %[[c128]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} { -// CHECK-PEEL-0: linalg.matmul ins({{.*}} : tensor<220x?xf32>, tensor) outs({{.*}} : tensor<220x?xf32>) -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } - -// CHECK-PEEL-12: func @matmul_static_tensor -// CHECK-PEEL-12-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-PEEL-12-DAG: %[[c128:.*]] = arith.constant 128 : index -// CHECK-PEEL-12-DAG: %[[c256:.*]] = arith.constant 256 : index -// CHECK-PEEL-12-DAG: %[[c512:.*]] = arith.constant 512 : index -// CHECK-PEEL-12-DAG: %[[c1500:.*]] = arith.constant 1500 : index -// CHECK-PEEL-12-DAG: %[[c1536:.*]] = arith.constant 1536 : index -// CHECK-PEEL-12-DAG: %[[c1600:.*]] = arith.constant 1600 : index -// CHECK-PEEL-12-DAG: %[[c1664:.*]] = arith.constant 1664 : index -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %[[c1500]] step %[[c256]] {{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %[[c1664]] step %[[c128]] {{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %[[c1536]] step %[[c512]] {{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor<512x128xf32>) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor<64x128xf32>) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: } -func.func @matmul_static_tensor(%arg0: tensor<1500x1600xf32>, %arg1: tensor<1600x1700xf32>) - -> tensor<1500x1700xf32> { - %out = tensor.empty() : tensor<1500x1700xf32> - %r = linalg.matmul {__internal_linalg_transform__ = "tile"} - ins(%arg0, %arg1: tensor<1500x1600xf32>, tensor<1600x1700xf32>) - outs(%out: tensor<1500x1700xf32>) -> tensor<1500x1700xf32> - return %r : tensor<1500x1700xf32> -} - -// ----- - -// CHECK-PEEL-0: func @matmul_dynamic_tensor -// CHECK-PEEL-0-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-PEEL-0-DAG: %[[c128:.*]] = arith.constant 128 : index -// CHECK-PEEL-0-DAG: %[[c256:.*]] = arith.constant 256 : index -// CHECK-PEEL-0-DAG: %[[c512:.*]] = arith.constant 512 : index -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c256]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} { -// CHECK-PEEL-0: linalg.matmul ins({{.*}} : tensor<256x?xf32>, tensor) outs({{.*}} : tensor<256x?xf32>) -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: scf.for %{{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} { -// CHECK-PEEL-0: linalg.matmul ins({{.*}} : tensor, tensor) outs({{.*}} : tensor) -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } - -// CHECK-PEEL-12: func @matmul_dynamic_tensor -// CHECK-PEEL-12-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-PEEL-12-DAG: %[[c128:.*]] = arith.constant 128 : index -// CHECK-PEEL-12-DAG: %[[c256:.*]] = arith.constant 256 : index -// CHECK-PEEL-12-DAG: %[[c512:.*]] = arith.constant 512 : index -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c256]] {{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor<512x128xf32>) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: scf.for %{{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: } -// CHECK-PEEL-12: scf.for %{{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: } -// CHECK-PEEL-12: } -func.func @matmul_dynamic_tensor(%arg0: tensor, %arg1: tensor) - -> tensor { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %d0 = tensor.dim %arg0, %c0 : tensor - %d1 = tensor.dim %arg1, %c1 : tensor - %out = tensor.empty(%d0, %d1) : tensor - %r = linalg.matmul {__internal_linalg_transform__ = "tile"} - ins(%arg0, %arg1: tensor, tensor) - outs(%out: tensor) -> tensor - return %r : tensor -} diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" | FileCheck %s +// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize | FileCheck %s // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 1)> // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 2)> @@ -10,6 +10,12 @@ return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1 + %1, %loop:2 = transform.structured.tile %0 [2, 3] +} + // CHECK: func @conv // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir --- a/mlir/test/Dialect/Linalg/tile-indexed.mlir +++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir @@ -1,6 +1,4 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=10,25" -split-input-file | FileCheck %s -check-prefix=TILE-10n25 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=25,0" -split-input-file | FileCheck %s -check-prefix=TILE-25n0 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,25" -split-input-file | FileCheck %s -check-prefix=TILE-0n25 +// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -split-input-file | FileCheck %s -check-prefix=TILE-10n25 func.func @indexed_vector(%arg0: memref<50xindex>) { linalg.generic {indexing_maps = [affine_map<(i) -> (i)>], @@ -12,6 +10,13 @@ } return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %loop:2 = transform.structured.tile %0 [10, 25] +} + // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> // TILE-10n25-LABEL: func @indexed_vector // TILE-10n25: %[[C10:.*]] = arith.constant 10 : index @@ -21,19 +26,6 @@ // TILE-10n25: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]]) // TILE-10n25: linalg.yield %[[NEW_I]] : index -// TILE-25n0-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> -// TILE-25n0-LABEL: func @indexed_vector -// TILE-25n0: %[[C25:.*]] = arith.constant 25 : index -// TILE-25n0: scf.for %[[J:.*]] = {{.*}} step %[[C25]] -// TILE-25n0: linalg.generic -// TILE-25n0: %[[I:.*]] = linalg.index 0 : index -// TILE-25n0: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]]) -// TILE-25n0: linalg.yield %[[NEW_I]] : index - -// TILE-0n25-LABEL: func @indexed_vector -// TILE-0n25-NOT: scf.for %[[J:.*]] = {{.*}} step % -// TILE-0n25: linalg.generic - // ----- func.func @indexed_matrix(%arg0: memref<50x50xindex>) { @@ -48,6 +40,13 @@ } return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %loop:2 = transform.structured.tile %0 [10, 25] +} + // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> // TILE-10n25-LABEL: func @indexed_matrix // TILE-10n25-DAG: %[[C25:.*]] = arith.constant 25 : index @@ -61,25 +60,3 @@ // TILE-10n25: %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]]) // TILE-10n25: %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[NEW_J]] : index // TILE-10n25: linalg.yield %[[SUM]] : index - -// TILE-25n0-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> -// TILE-25n0-LABEL: func @indexed_matrix -// TILE-25n0: %[[C25:.*]] = arith.constant 25 : index -// TILE-25n0: scf.for %[[L:.*]] = {{.*}} step %[[C25]] -// TILE-25n0: linalg.generic -// TILE-25n0: %[[I:.*]] = linalg.index 0 : index -// TILE-25n0: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[L]]) -// TILE-25n0: %[[J:.*]] = linalg.index 1 : index -// TILE-25n0: %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[J]] : index -// TILE-25n0: linalg.yield %[[SUM]] : index - -// TILE-0n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> -// TILE-0n25-LABEL: func @indexed_matrix -// TILE-0n25: %[[C25:.*]] = arith.constant 25 : index -// TILE-0n25: scf.for %[[L:.*]] = {{.*}} step %[[C25]] -// TILE-0n25: linalg.generic -// TILE-0n25: %[[I:.*]] = linalg.index 0 : index -// TILE-0n25: %[[J:.*]] = linalg.index 1 : index -// TILE-0n25: %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]]) -// TILE-0n25: %[[SUM:.*]] = arith.addi %[[I]], %[[NEW_J]] : index -// TILE-0n25: linalg.yield %[[SUM]] : index diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir --- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir +++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir @@ -1,53 +1,65 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" -cse -split-input-file | \ -// RUN: FileCheck %s -check-prefix=TILE2 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,3" -resolve-shaped-type-result-dims -cse -split-input-file | \ -// RUN: FileCheck %s -check-prefix=TILE1 -// This test only checks that tiling does not crash. -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2" -resolve-shaped-type-result-dims -cse -split-input-file - -// TILE2-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> -// TILE2-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> -// TILE2: func @dynamic_pad_tensor( -// TILE2-SAME: %[[IN:.*]]: tensor -// TILE2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE2-DAG: %[[C1:.*]] = arith.constant 1 : index -// TILE2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE2-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE2: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] -// TILE2: %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]] -// TILE2: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] -// TILE2: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] -// TILE2: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]] -// TILE2: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE2: %[[SWAP_RESULT:.*]] = scf.if -// TILE2: tensor.generate -// TILE2: else -// TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]] -// TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: return %[[RESULT]] - -// TILE1-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)> -// TILE1-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> -// TILE1: func @dynamic_pad_tensor( -// TILE1-SAME: %[[IN:.*]]: tensor -// TILE1-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE1-DAG: %[[C1:.*]] = arith.constant 1 : index -// TILE1-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE1: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] -// TILE1: %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]] -// TILE1: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] -// TILE1: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]] -// TILE1: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE1: %[[SWAP_RESULT:.*]] = scf.if -// TILE1: tensor.generate -// TILE1: else -// TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] -// TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] -// TILE1: return %[[RESULT]] - -func.func @dynamic_pad_tensor(%input_tensor: tensor, +// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -cse -split-input-file + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> +// CHECK: func @dynamic_pad_tensor_3_4( +// CHECK-SAME: %[[IN:.*]]: tensor +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] +// CHECK-DAG: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] +// CHECK-DAG: %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]] +// CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] +// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]] +// CHECK: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[SWAP_RESULT:.*]] = scf.if +// CHECK: tensor.generate +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] +// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: return %[[RESULT]] + +func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor, + %pad_value: f32) -> tensor { + %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor to tensor + return %0 : tensor +} + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 3] +} + +// ----- + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> +// CHECK: func @dynamic_pad_tensor_0_3( +// CHECK-SAME: %[[IN:.*]]: tensor +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] +// CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]] +// CHECK-DAG: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] +// CHECK-DAG: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]] +// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[SWAP_RESULT:.*]] = scf.if +// CHECK: tensor.generate +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] +// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] +// CHECK: return %[[RESULT]] + +func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor, %pad_value: f32) -> tensor { %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): @@ -56,41 +68,64 @@ return %0 : tensor } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3] +} + +// ----- + +// CHECK-LABEL: func @static_pad_tensor_3_4( +// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index +// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index +// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] +// CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[SWAP_RESULT:.*]] = scf.if +// CHECK: tensor.generate +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] +// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: return %[[RESULT]] + +func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>, + %pad_value: f32) -> tensor<15x16xf32> { + %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor<7x9xf32> to tensor<15x16xf32> + return %0 : tensor<15x16xf32> +} + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 3] +} + // ----- -// TILE2-LABEL: func @static_pad_tensor( -// TILE2-SAME: %[[IN:.*]]: tensor<7x9xf32> -// TILE2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE2-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE2-DAG: %[[C15:.*]] = arith.constant 15 : index -// TILE2-DAG: %[[C16:.*]] = arith.constant 16 : index -// TILE2: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] -// TILE2: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE2: %[[SWAP_RESULT:.*]] = scf.if -// TILE2: tensor.generate -// TILE2: else -// TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]] -// TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: return %[[RESULT]] - - -// TILE1-LABEL: func @static_pad_tensor( -// TILE1-SAME: %[[IN:.*]]: tensor<7x9xf32> -// TILE1-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE1-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE1-DAG: %[[C16:.*]] = arith.constant 16 : index -// TILE1: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE1: %[[SWAP_RESULT:.*]] = scf.if -// TILE1: tensor.generate -// TILE1: else -// TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] -// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] -// TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] -// TILE1: return %[[RESULT]] - -func.func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, +// CHECK-LABEL: func @static_pad_tensor_0_3( +// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index +// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[SWAP_RESULT:.*]] = scf.if +// CHECK: tensor.generate +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] +// CHECK: %[[CAST_SWAP_RESULT:.*]] = tensor.cast %[[SWAP_RESULT]] : tensor to tensor<15x?xf32> +// CHECK: tensor.insert_slice %[[CAST_SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] +// CHECK: return %[[RESULT]] + +func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>, %pad_value: f32) -> tensor<15x16xf32> { %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): @@ -99,25 +134,35 @@ return %0 : tensor<15x16xf32> } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3] +} + // ----- -// TILE1-LABEL: func @static_pad_tile_evenly( -// TILE1-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32> -// TILE1-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE1-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE1-DAG: %[[C15:.*]] = arith.constant 15 : index -// TILE1: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE1: %[[R2:.*]] = scf.if -// TILE1: %[[GEN:.*]] = tensor.generate -// TILE1: scf.yield %[[GEN]] : tensor<14x3xf32> -// TILE1: else -// TILE1: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> -// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] -// TILE1: scf.yield %[[PAD]] : tensor<14x3xf32> -// TILE1: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> -// TILE1: scf.yield %[[R3]] : tensor<14x15xf32> -// TILE1: return %[[RESULT]] : tensor<14x15xf32> -func.func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>, +// CHECK-LABEL: func @static_pad_tile_evenly_0_3( +// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index +// CHECK: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[R2:.*]] = scf.if +// CHECK: %[[GEN:.*]] = tensor.generate +// CHECK: %[[cast_0:.*]] = tensor.cast %[[GEN]] : tensor<14x3xf32> to tensor +// CHECK: scf.yield %[[cast_0]] : tensor +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] +// CHECK: %[[cast_1:.*]] = tensor.cast %[[PAD]] : tensor<14x?xf32> to tensor +// CHECK: scf.yield %[[cast_1]] : tensor +// CHECK: %[[cast:.*]] = tensor.cast %[[R2]] : tensor to tensor<14x3xf32> +// CHECK: %[[R3:.*]] = tensor.insert_slice %[[cast]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> +// CHECK: scf.yield %[[R3]] : tensor<14x15xf32> +// CHECK: return %[[RESULT]] : tensor<14x15xf32> + +func.func @static_pad_tile_evenly_0_3(%input_tensor: tensor<7x9xf32>, %output_tensor: tensor<14x15xf32>, %pad_value: f32) -> tensor<14x15xf32> { %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] { @@ -126,3 +171,9 @@ } : tensor<7x9xf32> to tensor<14x15xf32> return %0 : tensor<14x15xf32> } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3] +} diff --git a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir b/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir +++ /dev/null @@ -1,113 +0,0 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4,8 loop-type=parallel" -split-input-file | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2 loop-type=parallel" -split-input-file | FileCheck %s -check-prefix=TILE1 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4 loop-type=parallel" -split-input-file | FileCheck %s -check-prefix=TILE2 - -func.func @gemm(%arg0 : memref, - %arg1 : memref, - %arg2 : memref) -{ - linalg.matmul ins(%arg0, %arg1: memref, memref) - outs(%arg2: memref) - return -} -// CHECK-LABEL: func @gemm -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = -// CHECK-SAME: step (%[[C2]], %[[C4]]) -// CHECK: scf.for %[[ARG5:.*]] = -// CHECK-SAME: step %[[C8]] -// CHECK: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]] -// CHECK: %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG5]], %[[ARG4]]] -// CHECK: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// TILE1-LABEL: func @gemm -// TILE1-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE1: scf.parallel (%[[ARG3:.*]]) = -// TILE1-SAME: step (%[[C2]]) -// TILE1: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE1: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE1-NOT: memref.subview -// TILE1: linalg.matmul ins(%[[SV1]], %{{.*}} outs(%[[SV3]] - -// TILE2-LABEL: func @gemm -// TILE2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE2-DAG: %[[C4:.*]] = arith.constant 4 : index -// TILE2: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = -// TILE2-SAME: step (%[[C2]], %[[C4]]) -// TILE2: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE2: %[[SV2:.*]] = memref.subview %{{.*}}[0, %[[ARG4]]] -// TILE2: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]] -// TILE2: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> -#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> -#map2 = affine_map<(d0, d1, d2) -> (d1)> -#accesses = [#map0, #map1, #map2] -#trait = { - args_in = 2 : i64, - args_out = 1 : i64, - iterator_types = ["reduction", "parallel", "reduction"], - indexing_maps = #accesses -} - -func.func @reduction(%arg0 : memref, - %arg1 : memref, - %arg2 : memref) -{ - linalg.generic #trait - ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { - ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): - %0 = arith.addf %arg3, %arg4 : f32 - %1 = arith.addf %0, %arg5 : f32 - linalg.yield %1 : f32 - } - return -} - -// CHECK-LABEL: func @reduction -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK: scf.for %[[ARG3:.*]] = -// CHECK-SAME: step %[[C2]] -// CHECK: scf.parallel (%[[ARG4:.*]]) = -// CHECK-SAME: step (%[[C4]]) -// CHECK: scf.for %[[ARG5:.*]] = -// CHECK-SAME: step %[[C8]] -// CHECK: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], %[[ARG5]]] -// CHECK: %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]] -// CHECK: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]] -// CHECK: linalg.generic -// CHECK-SAME: ins(%[[SV1]], %[[SV2]] -// CHECK-SAME: outs(%[[SV3]] - -// TILE1-LABEL: func @reduction -// TILE1-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE1: scf.for %[[ARG3:.*]] = -// TILE1-SAME: step %[[C2]] -// TILE1: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0, 0] -// TILE1: %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE1-NOT: memref.subview -// TILE1: linalg.generic -// TILE1-SAME: ins(%[[SV1]], %[[SV2]] -// TILE1-SAME: outs(%{{.*}} - -// TILE2-LABEL: func @reduction -// TILE2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE2-DAG: %[[C4:.*]] = arith.constant 4 : index -// TILE2: scf.for %[[ARG3:.*]] = -// TILE2-SAME: step %[[C2]] -// TILE2: scf.parallel (%[[ARG4:.*]]) = -// TILE2-SAME: step (%[[C4]]) -// TILE2: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], 0] -// TILE2: %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE2: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]] -// TILE2: linalg.generic -// TILE2-SAME: ins(%[[SV1]], %[[SV2]] -// TILE2-SAME: outs(%[[SV3]] diff --git a/mlir/test/Dialect/Linalg/tile-parallel.mlir b/mlir/test/Dialect/Linalg/tile-parallel.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/tile-parallel.mlir +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-2 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-02 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,0,2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-002 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4 loop-type=parallel" | FileCheck %s -check-prefix=TILE-234 - -#id_2d = affine_map<(i, j) -> (i, j)> -#pointwise_2d_trait = { - args_in = 2, - args_out = 1, - indexing_maps = [#id_2d, #id_2d, #id_2d], - iterator_types = ["parallel", "parallel"] -} - -func.func @sum(%lhs: memref>, - %rhs: memref>, - %sum: memref>) { - linalg.generic #pointwise_2d_trait - ins(%lhs, %rhs: memref>, - memref>) - outs(%sum : memref>) { - ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32): - %result = arith.addf %lhs_in, %rhs_in : f32 - linalg.yield %result : f32 - } - return -} -// TILE-2-LABEL: func @sum( -// TILE-2-SAME: [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) { -// TILE-2-DAG: [[C0:%.*]] = arith.constant 0 : index -// TILE-2-DAG: [[C2:%.*]] = arith.constant 2 : index -// TILE-2: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0 -// TILE-2: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) { -// TILE-2-NO: scf.parallel -// TILE-2: [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]] -// TILE-2: [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]] -// TILE-2: [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]] -// TILE-2: linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]] - -// TILE-02-LABEL: func @sum( -// TILE-02-SAME: [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) { -// TILE-02-DAG: [[C0:%.*]] = arith.constant 0 : index -// TILE-02-DAG: [[C2:%.*]] = arith.constant 2 : index -// TILE-02: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1 -// TILE-02: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) { -// TILE-02-NO: scf.parallel -// TILE-02: [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]] -// TILE-02: [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]] -// TILE-02: [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]] -// TILE-02: linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]] - -// TILE-002-LABEL: func @sum( -// TILE-002-SAME: [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) { -// TILE-002-NO: scf.parallel -// TILE-002: linalg.generic {{.*}} ins([[LHS]], [[RHS]]{{.*}} outs([[SUM]] - -// TILE-234-LABEL: func @sum( -// TILE-234-SAME: [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) { -// TILE-234-DAG: [[C0:%.*]] = arith.constant 0 : index -// TILE-234-DAG: [[C2:%.*]] = arith.constant 2 : index -// TILE-234-DAG: [[C3:%.*]] = arith.constant 3 : index -// TILE-234: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0 -// TILE-234: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1 -// TILE-234: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) { -// TILE-234-NO: scf.parallel -// TILE-234: [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]] -// TILE-234: [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]] -// TILE-234: [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]] -// TILE-234: linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]] diff --git a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir b/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir +++ /dev/null @@ -1,74 +0,0 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-scalarize-dynamic-dims" -scf-for-loop-canonicalization -canonicalize -split-input-file | \ -// RUN: FileCheck %s - -// CHECK-LABEL: func @matmul_partly_dynamic_tensor( -// CHECK-SAME: %[[ARG0:.*]]: tensor, %[[ARG1:.*]]: tensor -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK: tensor.dim %[[ARG0]], %[[C0]] : tensor -// CHECK: %[[UB1:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor -// CHECK: %[[UB2:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor -// CHECK: scf.for %[[IV0:.*]] = %[[C0]] to %[[UB1]] step %[[C1]] -// CHECK: scf.for %[[IV1:.*]] = %[[C0]] to %[[UB2]] step %[[C1]] -// CHECK: %[[S1:.*]] = tensor.extract_slice %[[ARG0]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] : tensor to tensor<1x1xf32> -// CHECK: %[[S2:.*]] = tensor.extract_slice %[[ARG1]][%[[IV1]], 0] [1, 2000] [1, 1] : tensor to tensor<1x2000xf32> -// CHECK: %[[S3:.*]] = tensor.extract_slice %{{.*}}[%[[IV0]], 0] [1, 2000] [1, 1] : tensor to tensor<1x2000xf32> -// CHECK: linalg.matmul ins(%[[S1]], %[[S2]] : tensor<1x1xf32>, tensor<1x2000xf32>) outs(%[[S3]] : tensor<1x2000xf32>) -> tensor<1x2000xf32> -func.func @matmul_partly_dynamic_tensor(%arg0: tensor, %arg1: tensor) - -> tensor { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %d0 = tensor.dim %arg0, %c0 : tensor - %out = tensor.empty(%d0) : tensor - %r = linalg.matmul {__internal_linalg_transform__ = "tile"} - ins(%arg0, %arg1: tensor, tensor) - outs(%out: tensor) -> tensor - return %r : tensor -} - -// ----- - -// The input IR of this test case is a tiled and peeled linalg.matmul op. - -// CHECK-LABEL: func @tiled_and_peeled_matmul( -// CHECK: linalg.matmul ins({{.*}} : tensor<32x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<32x258xf32>) -> tensor<32x258xf32> -// CHECK: linalg.matmul ins({{.*}} : tensor<1x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<1x258xf32>) -> tensor<1x258xf32> -#map0 = affine_map<(d0) -> (64, -d0 + 257)> -#map1 = affine_map<()[s0] -> ((s0 floordiv 32) * 32)> -#map2 = affine_map<(d0)[s0] -> (d0 - (s0 floordiv 32) * 32)> - -func.func @tiled_and_peeled_matmul(%arg0: tensor<257x259xf32>, %arg1: tensor<259x258xf32>, %arg2: tensor<257x258xf32>) -> tensor<257x258xf32> { - %c257 = arith.constant 257 : index - %c64 = arith.constant 64 : index - %cst = arith.constant 0.000000e+00 : f32 - %c0 = arith.constant 0 : index - %c32 = arith.constant 32 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<257x258xf32>) -> tensor<257x258xf32> - %1 = scf.for %arg3 = %c0 to %c257 step %c64 iter_args(%arg4 = %0) -> (tensor<257x258xf32>) { - %2 = affine.min #map0(%arg3) - %3 = tensor.extract_slice %arg0[%arg3, 0] [%2, 259] [1, 1] : tensor<257x259xf32> to tensor - %4 = tensor.extract_slice %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor<257x258xf32> to tensor - %5 = affine.apply #map1()[%2] - %6 = scf.for %arg5 = %c0 to %5 step %c32 iter_args(%arg6 = %4) -> (tensor) { - %10 = tensor.extract_slice %3[%arg5, 0] [32, 259] [1, 1] : tensor to tensor<32x259xf32> - %11 = tensor.extract_slice %arg6[%arg5, 0] [32, 258] [1, 1] : tensor to tensor<32x258xf32> - %12 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%10, %arg1 : tensor<32x259xf32>, tensor<259x258xf32>) outs(%11 : tensor<32x258xf32>) -> tensor<32x258xf32> - %13 = tensor.insert_slice %12 into %arg6[%arg5, 0] [32, 258] [1, 1] : tensor<32x258xf32> into tensor - scf.yield %13 : tensor - } - %7 = arith.cmpi slt, %5, %2 : index - %8 = scf.if %7 -> (tensor) { - %10 = affine.apply #map2(%2)[%2] - %11 = tensor.extract_slice %3[%5, 0] [%10, 259] [1, 1] : tensor to tensor - %12 = tensor.extract_slice %6[%5, 0] [%10, 258] [1, 1] : tensor to tensor - %13 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%11, %arg1 : tensor, tensor<259x258xf32>) outs(%12 : tensor) -> tensor - %14 = tensor.insert_slice %13 into %6[%5, 0] [%10, 258] [1, 1] : tensor into tensor - scf.yield %14 : tensor - } else { - scf.yield %6 : tensor - } - %9 = tensor.insert_slice %8 into %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor into tensor<257x258xf32> - scf.yield %9 : tensor<257x258xf32> - } - return %1 : tensor<257x258xf32> -} diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file | FileCheck %s // CHECK-LABEL: func @matmul_tensors( // CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor @@ -27,6 +27,12 @@ return %0 : tensor } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2, 3, 4] +} + // ----- func.func @generic_op_tensors( @@ -52,6 +58,12 @@ return %4 : tensor } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2, 3, 4] +} + // CHECK-LABEL: func @generic_op_tensors // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor @@ -117,3 +129,8 @@ return %2 : tensor } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2, 3, 4] +} diff --git a/mlir/test/Dialect/Linalg/tile-zero.mlir b/mlir/test/Dialect/Linalg/tile-zero.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/tile-zero.mlir +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: mlir-opt -test-linalg-transform-patterns=test-tile-pattern %s | FileCheck %s - -func.func @matmul_zero_tile( - %arg0: tensor, %arg1 : tensor, %arg2 : tensor) -> tensor { - %0 = linalg.matmul {__internal_linalg_transform__ = "tile"} - ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor - return %0 : tensor -} -// CHECK-LABEL: matmul_zero_tile -// CHECK: linalg.matmul -// CHECK-NOT: __internal_linalg_transform__ diff --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/tile.mlir +++ /dev/null @@ -1,331 +0,0 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-2 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-02 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-002 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-234 - -// TILE-2-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// TILE-02-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// TILE-002-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// TILE-234-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// TILE-234-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 3)> -// TILE-234-DAG: #[[$bound_map_4:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> - -func.func @matmul(%arg0: memref>, - %arg1: memref>, - %arg2: memref>) { - linalg.matmul - ins(%arg0, %arg1: memref>, - memref>) - outs(%arg2: memref>) - return -} -// TILE-2-LABEL: func @matmul( -// TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { -// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-2: %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref> to memref> -// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref> to memref> -// TILE-2: linalg.matmul ins(%[[sAi]]{{.*}} outs(%[[sCi]] - -// TILE-02-LABEL: func @matmul( -// TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-02: %[[N:.*]] = memref.dim %arg1, %c1 : memref> -// TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} { -// TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]] -// TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-02: %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]] -// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref> to memref> -// TILE-02: %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref> to memref> -// TILE-02: linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]] - -// TILE-002-LABEL: func @matmul( -// TILE-002-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-002-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-002: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} { -// TILE-002: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]] -// TILE-002: %[[szK_1:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]] -// TILE-002: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-002: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref> to memref> -// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK_1]], %[[N]]] [1, 1] : memref> to memref> -// TILE-002: linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}} - -// TILE-234-LABEL: func @matmul( -// TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE-234-DAG: %[[C4:.*]] = arith.constant 4 : index -// TILE-234: %[[ubM:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-234: %[[ubN:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-234: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} { -// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} { -// TILE-234: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} { -// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]] -// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]] -// TILE-234: %[[szK_1:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]] -// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]] -// TILE-234: %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]] -// TILE-234: %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref> to memref> -// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK_1]], %[[szN]]] [1, 1] : memref> to memref> -// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM_1]], %[[szN_1]]] [1, 1] : memref> to memref> -// -// TILE-234: linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]] - -// When the buffer shapes are known at compile time, it is possible to avoid -// the "min" in subview size computation. This test uses buffer sizes divisible -// by respective tile sizes (M=10 divisble by 2, N=12 divisible by 2 and 3, -// K=16 divisble by 2 and 4). -func.func @matmul_static(%arg0: memref<10x16xf32, strided<[?, 1], offset: ?>>, - %arg1: memref<16x12xf32, strided<[?, 1], offset: ?>>, - %arg2: memref<10x12xf32, strided<[?, 1], offset: ?>>) { - linalg.matmul - ins(%arg0, %arg1: memref<10x16xf32, strided<[?, 1], offset: ?>>, - memref<16x12xf32, strided<[?, 1], offset: ?>>) - outs(%arg2: memref<10x12xf32, strided<[?, 1], offset: ?>>) - return -} -// TILE-2-LABEL: func @matmul_static( -// TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-2-DAG: %[[M:.*]] = arith.constant 10 : index -// TILE-2: scf.for %[[I:.*]] = %{{.*}} to %[[M]] step %{{.*}} { -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [2, 16] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<2x16xf32, strided<[?, 1], offset: ?>> -// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [2, 12] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<2x12xf32, strided<[?, 1], offset: ?>> -// TILE-2: linalg.matmul ins(%[[sAi]], %{{.*}}{{.*}} outs(%[[sCi]] - -// TILE-02-LABEL: func @matmul_static( -// TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-02-DAG: %[[N:.*]] = arith.constant 12 : index -// TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} { -// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [16, 2] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<16x2xf32, strided<[?, 1], offset: ?>> -// TILE-02: %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [10, 2] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<10x2xf32, strided<[?, 1], offset: ?>> -// TILE-02: linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]] - -// TILE-002-LABEL: func @matmul_static( -// TILE-002-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-002-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-002-DAG: %[[C16:.*]] = arith.constant 16 : index -// TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} { -// TILE-002: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [10, 2] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<10x2xf32, strided<[?, 1], offset: ?>> -// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [2, 12] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<2x12xf32, strided<[?, 1], offset: ?>> -// TILE-002: linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}} - -// TILE-234-LABEL: func @matmul_static( -// TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE-234-DAG: %[[C4:.*]] = arith.constant 4 : index -// TILE-234-DAG: %[[C10:.*]] = arith.constant 10 : index -// TILE-234-DAG: %[[C16:.*]] = arith.constant 16 : index -// TILE-234-DAG: %[[C12:.*]] = arith.constant 12 : index -// TILE-234: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[C10]] step %{{.*}} { -// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[C12]] step %{{.*}} { -// TILE-234: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} { -// TILE-234: %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [2, 4] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<2x4xf32, strided<[?, 1], offset: ?>> -// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [4, 3] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<4x3xf32, strided<[?, 1], offset: ?>> -// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [2, 3] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<2x3xf32, strided<[?, 1], offset: ?>> -// -// TILE-234: linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]] - -func.func @matvec(%arg0: memref>, %arg1: memref>, %arg2: memref>) { - linalg.matvec - ins(%arg0, %arg1: memref>, - memref>) - outs(%arg2: memref>) - return -} -// TILE-2-LABEL: func @matvec( -// TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { -// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-2: %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref> to memref> -// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref> to memref> -// TILE-2: linalg.matvec ins(%[[sAi]], %{{.*}} outs(%[[sCi]] - -// TILE-02-LABEL: func @matvec( -// TILE-02-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-02-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-02-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-02: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} { -// TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]] -// TILE-02: %[[szN_1:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]] -// TILE-02: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref> to memref> -// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref> to memref> -// TILE-02: linalg.matvec ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}} - -// TILE-002-LABEL: func @matvec( -// TILE-002-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-002-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-002-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-002-NOT: scf.for - -// TILE-234-LABEL: func @matvec( -// TILE-234-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-234-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-234-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE-234: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-234: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-234: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { -// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} { -// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]] -// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]] -// TILE-234: %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]] -// TILE-234: %[[sAij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref> to memref> -// TILE-234: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref> to memref> -// TILE-234: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref> to memref> -// -// TILE-234: linalg.matvec ins(%[[sAij]], %[[sBj]]{{.*}} outs(%[[sCi]] - -func.func @dot(%arg0: memref>, %arg1: memref>, %arg2: memref) { - linalg.dot - ins(%arg0, %arg1: memref>, memref>) - outs(%arg2: memref) - return -} -// TILE-2-LABEL: func @dot( -// TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { -// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[szM_1:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref> to memref> -// TILE-2: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref> to memref> -// TILE-2: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs( - -// TILE-02-LABEL: func @dot( -// TILE-02-NOT: scf.for - -// TILE-002-LABEL: func @dot( -// TILE-002-NOT: scf.for - -// TILE-234-LABEL: func @dot( -// TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-234: scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} { -// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]] -// TILE-234: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref> to memref> -// TILE-234: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref> to memref> -// TILE-234: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs( - -func.func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<127x99xf32>) - return -} -// TILE-2-LABEL: func @fill_static -// TILE-2: for -// TILE-2-NOT: for -// TILE-2: memref.subview{{.*}} : memref<127x99xf32> -// TILE-2: linalg.fill{{.*}} : memref> - -// TILE-02-LABEL: func @fill_static -// TILE-02: for -// TILE-02-NOT: for -// TILE-02: memref.subview{{.*}} : memref<127x99xf32> -// TILE-02: linalg.fill{{.*}} : memref<127x?xf32, strided<[99, 1], offset: ?>> - -// TILE-002-LABEL: func @fill_static -// TILE-002-NOT: for -// TILE-002: linalg.fill{{.*}} : memref<127x99xf32> - -// TILE-234-LABEL: func @fill_static -// TILE-234: for -// TILE-234: for -// TILE-234-NOT: for -// TILE-234: memref.subview{{.*}} : memref<127x99xf32> -// TILE-234: linalg.fill{{.*}} : memref> - - -func.func @fill(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) - return -} -// TILE-2-LABEL: func @fill -// TILE-2: for -// TILE-2-NOT: for -// TILE-2: fill{{.*}} f32 - -// TILE-02-LABEL: func @fill -// TILE-02: for -// TILE-02-NOT: for -// TILE-02: fill{{.*}} f32 - -// TILE-002-LABEL: func @fill -// TILE-002-NOT: for -// TILE-002: fill{{.*}} f32 - -// TILE-234-LABEL: func @fill -// TILE-234: for -// TILE-234: for -// TILE-234-NOT: for -// TILE-234: fill{{.*}} f32 - -#id_2d = affine_map<(i, j) -> (i, j)> -#pointwise_2d_trait = { - args_in = 2, - args_out = 1, - indexing_maps = [#id_2d, #id_2d, #id_2d], - iterator_types = ["parallel", "parallel"] -} - -func.func @pointwise(%arg0: memref>, %arg1: memref>, - %arg2: memref>) { - linalg.generic #pointwise_2d_trait - ins(%arg0, %arg1 : memref>, memref>) - outs(%arg2 : memref>) { - ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): - %4 = arith.addf %arg4, %arg5 : f32 - linalg.yield %4 : f32 - } - return -} -// TILE-2-LABEL: func @pointwise -// TILE-2: for -// TILE-2-NOT: for -// TILE-2: linalg.generic - -// TILE-02-LABEL: func @pointwise -// TILE-02: for -// TILE-02-NOT: for -// TILE-02: linalg.generic - -// TILE-002-LABEL: func @pointwise -// TILE-002-NOT: for -// TILE-002: linalg.generic - -// TILE-234-LABEL: func @pointwise -// TILE-234: for -// TILE-234: for -// TILE-234-NOT: for -// TILE-234: linalg.generic diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir --- a/mlir/test/Dialect/Linalg/transform-patterns.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir @@ -1,20 +1,22 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns=test-patterns -split-input-file -test-transform-dialect-interpreter | FileCheck %s +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-linalg-transform-patterns=test-patterns -split-input-file | FileCheck %s -// Map corresponding to a 2D memory access where the stride along the last dim is known to be 1. -// CHECK-DAG: #[[$kn:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)> -// CHECK-DAG: #[[$nm:.*]] = affine_map<(d0, d1, d2) -> (d1, d0)> -// CHECK-DAG: #[[$km:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> +// ----- func.func @dot(%x: memref>, %y: memref>, %v: memref) { - linalg.dot { __internal_linalg_transform__ = "MEM" } - ins(%x, %y: memref>, - memref>) - outs(%v: memref) - + linalg.dot ins(%x, %y: memref>, + memref>) + outs(%v: memref) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.dot"]} in %arg1 + %1, %loop = transform.structured.tile %0 [8000] +} + // CHECK-LABEL: func @dot // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index @@ -28,6 +30,8 @@ // CHECK: arith.addf // CHECK: store +// ----- + func.func @matvec(%A: memref>, %x: memref>, %y: memref>) { @@ -37,25 +41,43 @@ outs(%y: memref>) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 + %1, %loops:2 = transform.structured.tile %0 [5, 6] +} + // CHECK-LABEL: func @matvec // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index // CHECK-DAG: %[[c6:.*]] = arith.constant 6 : index -// CHECK: scf.parallel {{.*}} step (%[[c5]]) +// CHECK: scf.for {{.*}} step %[[c5]] // CHECK: scf.for {{.*}} step %[[c6]] // CHECK: linalg.matvec // CHECK: ins({{.*}}: memref>, memref>) // CHECK: outs({{.*}}: memref>) +// ----- + func.func @matmul(%A: memref>, %B: memref>, %C: memref>) { - linalg.matmul { __internal_linalg_transform__ = "MEM" } - ins(%A, %B: memref>, - memref>) - outs(%C: memref>) + linalg.matmul ins(%A, %B: memref>, + memref>) + outs(%C: memref>) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000] + %2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400] + %3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40] + %4, %loops_4:3 = transform.structured.tile %3 [2, 3, 4] +} + // CHECK-LABEL: func @matmul // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c2:.*]] = arith.constant 2 : index @@ -86,6 +108,13 @@ // CHECK: ins({{.*}}: memref>, memref>) // CHECK: outs({{.*}}: memref>) +// ----- + +// Map corresponding to a 2D memory access where the stride along the last dim is known to be 1. +// CHECK-DAG: #[[$kn:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)> +// CHECK-DAG: #[[$nm:.*]] = affine_map<(d0, d1, d2) -> (d1, d0)> +// CHECK-DAG: #[[$km:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> + #matmul_accesses = [ affine_map<(m, n, k) -> (m, k)>, affine_map<(m, n, k) -> (k, n)>, @@ -112,6 +141,7 @@ } return } + transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): transform.sequence %arg0 failures(propagate) { @@ -120,6 +150,7 @@ transform.structured.interchange %0 { iterator_interchange = [1, 2, 0]} } } + // CHECK-LABEL: func @permute_generic // CHECK: linalg.generic { // CHECK-SAME: indexing_maps = [#[[$kn]], #[[$nm]], #[[$km]]], @@ -129,15 +160,23 @@ // CHECK-SAME: memref> // CHECK-SAME: memref> +// ----- + func.func @matvec_perm(%A: memref>, %x: memref>, %y: memref>) { - linalg.matvec {__internal_linalg_transform__ = "__with_perm__"} - ins(%A, %x: memref>, - memref>) - outs(%y: memref>) + linalg.matvec ins(%A, %x: memref>, + memref>) + outs(%y: memref>) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 + %1, %loops:2 = transform.structured.tile %0 [5, 6] {interchange = [1, 0]} +} + // CHECK-LABEL: func @matvec_perm // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index @@ -148,15 +187,25 @@ // CHECK: ins({{.*}}: memref>, memref>) // CHECK: outs({{.*}}: memref>) +// ----- + func.func @matmul_perm(%A: memref>, %B: memref>, %C: memref>) { - linalg.matmul {__internal_linalg_transform__ = "__with_perm__"} - ins(%A, %B: memref>, - memref>) - outs(%C : memref>) + linalg.matmul ins(%A, %B: memref>, + memref>) + outs(%C : memref>) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000] {interchange=[1, 2, 0]} + %2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400] {interchange=[1, 0, 2]} + %3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40] +} + // CHECK-LABEL: func @matmul_perm // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c20:.*]] = arith.constant 20 : index @@ -180,26 +229,3 @@ // CHECK: linalg.matmul // CHECK: ins({{.*}}: memref>, memref>) // CHECK: outs({{.*}}: memref>) - -func.func @tile_permute_parallel_loop(%arg0: memref, - %arg1: memref, - %arg2: memref) { - linalg.matmul {__internal_linalg_transform__ = "par__with_perm__"} - ins(%arg0, %arg1: memref, memref) - outs(%arg2: memref) - return -} -// CHECK-LABEL: func @tile_permute_parallel_loop -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref -// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[D0:.*]] = memref.dim %[[ARG0]], %c0 -// CHECK-DAG: %[[D1:.*]] = memref.dim %[[ARG0]], %c1 -// CHECK-DAG: %[[D2:.*]] = memref.dim %[[ARG1]], %c1 -// CHECK: scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D2]]) step (%[[C8]]) -// CHECK: scf.for %{{.*}} = %[[C0]] to %[[D1]] step %[[C4]] -// CHECK: scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D0]]) step (%[[C16]]) diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=4" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -24,6 +24,12 @@ return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_1d"]} in %arg1 + %1, %loop = transform.structured.tile %0 [4] +} + func.func @main() { %c3 = arith.constant 3 : index %c6 = arith.constant 6 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -26,6 +26,12 @@ return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_1d_nwc_wcf"]} in %arg1 + %1, %loops:2 = transform.structured.tile %0 [2, 4] +} + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -24,6 +24,12 @@ return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1 + %1, %loops:2 = transform.structured.tile %0 [2, 2] +} + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -26,6 +26,12 @@ return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_hwcf"]} in %arg1 + %1, %loops:4 = transform.structured.tile %0 [2, 3, 3, 2] +} + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2,2" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -24,6 +24,12 @@ return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_3d"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2, 2, 2] +} + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -26,6 +26,11 @@ return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_3d_ndhwc_dhwcf"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [0, 5, 5, 5] +} func.func @main() { %c0 = arith.constant 0 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -1,12 +1,12 @@ // UNSUPPORTED: asan -// RUN: mlir-opt %s -linalg-bufferize -arith-bufferize \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -linalg-bufferize -arith-bufferize \ // RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=1,2,3" -linalg-bufferize \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -linalg-bufferize \ // RUN: -scf-bufferize -arith-bufferize -tensor-bufferize \ // RUN: -func-bufferize \ // RUN: -finalizing-bufferize -convert-linalg-to-loops -convert-scf-to-cf -convert-scf-to-cf \ @@ -36,4 +36,10 @@ return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [1, 2, 3] +} + func.func private @printMemrefF32(%ptr : tensor<*xf32>) diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -61,10 +61,6 @@ Option testPatterns{*this, "test-patterns", llvm::cl::desc("Test a mixed set of patterns"), llvm::cl::init(false)}; - Option testTileAndDistributionOptions{ - *this, "test-tile-and-distribute-options", - llvm::cl::desc("Test tile and distribute options"), - llvm::cl::init(false)}; Option testVectorTransferForwardingPatterns{ *this, "test-vector-transfer-forwarding-patterns", llvm::cl::desc( @@ -75,13 +71,6 @@ llvm::cl::desc("Test a set of patterns that rewrite a linalg contraction " "in vector.contract form"), llvm::cl::init(false)}; - Option testTilePattern{*this, "test-tile-pattern", - llvm::cl::desc("Test tile pattern"), - llvm::cl::init(false)}; - Option testTileScalarizeDynamicDims{ - *this, "test-tile-scalarize-dynamic-dims", - llvm::cl::desc("Test tiling of dynamic dims by 1"), - llvm::cl::init(false)}; Option testTransformPadTensor{ *this, "test-transform-pad-tensor", llvm::cl::desc("Test transform pad tensor by copying with generic ops"), @@ -135,91 +124,12 @@ MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(ctx); - //===--------------------------------------------------------------------===// - // Linalg tiling patterns. - //===--------------------------------------------------------------------===// - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({2000, 3000, 4000}), - LinalgTransformationFilter(StringAttr::get(ctx, "MEM"), - StringAttr::get(ctx, "L3"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({200, 300, 400}), - LinalgTransformationFilter(StringAttr::get(ctx, "L3"), - StringAttr::get(ctx, "L2"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({20, 30, 40}), - LinalgTransformationFilter(StringAttr::get(ctx, "L2"), - StringAttr::get(ctx, "L1"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({2, 3, 4}), - LinalgTransformationFilter(StringAttr::get(ctx, "L1"), - StringAttr::get(ctx, "REG"))); - - patterns.add( - MatvecOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({5, 6}).setLoopType( - LinalgTilingLoopType::ParallelLoops), - LinalgTransformationFilter(ArrayRef{}, - StringAttr::get(ctx, "L1"))); - - patterns.add( - DotOp::getOperationName(), ctx, LinalgTilingOptions().setTileSizes(8000), - LinalgTransformationFilter( - ArrayRef{StringAttr::get(ctx, "MEM"), - StringAttr::get(ctx, "L3"), - StringAttr::get(ctx, "L2")}, - StringAttr::get(ctx, "REG"))); - - //===--------------------------------------------------------------------===// - // Linalg tiling and permutation patterns. - //===--------------------------------------------------------------------===// - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions() - .setTileSizes({2000, 3000, 4000}) - .setInterchange({1, 2, 0}), - LinalgTransformationFilter(StringAttr::get(ctx, "__with_perm__"), - StringAttr::get(ctx, "L2__with_perm__"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions() - .setTileSizes({200, 300, 400}) - .setInterchange({1, 0, 2}), - LinalgTransformationFilter(StringAttr::get(ctx, "L2__with_perm__"), - StringAttr::get(ctx, "L1__with_perm__"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({20, 30, 40}), - LinalgTransformationFilter(StringAttr::get(ctx, "L1__with_perm__"), - StringAttr::get(ctx, "REG__with_perm__"))); - - patterns.add( - MatvecOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({5, 6}).setInterchange({1, 0}), - LinalgTransformationFilter(StringAttr::get(ctx, "__with_perm__"), - StringAttr::get(ctx, "L1__with_perm__"))); - - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions() - .setTileSizes({16, 8, 4}) - .setInterchange({1, 2, 0}) - .setLoopType(LinalgTilingLoopType::ParallelLoops), - LinalgTransformationFilter( - StringAttr::get(ctx, "par__with_perm__"), - StringAttr::get(ctx, "after_par__with_perm__"))); - //===--------------------------------------------------------------------===// // Linalg to loops patterns. //===--------------------------------------------------------------------===// patterns.add>( ctx, - /*loweringType=*/LinalgLoweringType::Loops, - LinalgTransformationFilter(StringAttr::get(ctx, "REG"))); + /*loweringType=*/LinalgLoweringType::Loops); //===--------------------------------------------------------------------===// // Linalg distribution patterns. @@ -239,178 +149,6 @@ }); } -template -static SmallVector -getGpuProcIds(OpBuilder &b, Location loc, ArrayRef parallelLoopRanges, - ArrayRef distributionMethod) { - size_t count = std::min(3, parallelLoopRanges.size()); - SmallVector procInfo(count); - Type indexType = b.getIndexType(); - for (unsigned i = 0; i < count; ++i) { - gpu::Dimension dim = *gpu::symbolizeDimension(i); - procInfo[count - 1 - i] = {b.create(loc, indexType, dim), - b.create(loc, indexType, dim), - distributionMethod[count - 1 - i]}; - } - return procInfo; -} - -static void fillTileAndDistributePatterns(MLIRContext *context, - RewritePatternSet &patterns) { - { - LinalgLoopDistributionOptions cyclicNprocsEqNiters; - SmallVector distributionMethod = { - DistributionMethod::CyclicNumProcsEqNumIters, - DistributionMethod::CyclicNumProcsEqNumIters}; - cyclicNprocsEqNiters.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsEqNiters), - LinalgTransformationFilter( - StringAttr::get(context, "distribute1"), - StringAttr::get(context, "after_distribute1"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsGeNiters; - SmallVector distributionMethod = { - DistributionMethod::CyclicNumProcsGeNumIters, - DistributionMethod::CyclicNumProcsGeNumIters}; - cyclicNprocsGeNiters.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsGeNiters), - LinalgTransformationFilter( - StringAttr::get(context, "distribute2"), - StringAttr::get(context, "after_distribute2"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsDefault; - SmallVector distributionMethod = { - DistributionMethod::Cyclic, DistributionMethod::Cyclic}; - cyclicNprocsDefault.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsDefault), - LinalgTransformationFilter( - StringAttr::get(context, "distribute3"), - StringAttr::get(context, "after_distribute3"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsMixed1; - SmallVector distributionMethod = { - DistributionMethod::CyclicNumProcsEqNumIters, - DistributionMethod::CyclicNumProcsGeNumIters}; - cyclicNprocsMixed1.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsMixed1), - LinalgTransformationFilter( - StringAttr::get(context, "distribute4"), - StringAttr::get(context, "after_distribute4"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsMixed2; - SmallVector distributionMethod = { - DistributionMethod::CyclicNumProcsGeNumIters, - DistributionMethod::Cyclic}; - cyclicNprocsMixed2.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsMixed2), - LinalgTransformationFilter( - StringAttr::get(context, "distribute5"), - StringAttr::get(context, "after_distribute5"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsMixed3; - SmallVector distributionMethod = { - DistributionMethod::Cyclic, - DistributionMethod::CyclicNumProcsEqNumIters}; - cyclicNprocsMixed3.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsMixed3), - LinalgTransformationFilter( - StringAttr::get(context, "distribute6"), - StringAttr::get(context, "after_distribute6"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsEqNiters; - SmallVector distributionMethod = { - DistributionMethod::Cyclic, DistributionMethod::Cyclic}; - cyclicNprocsEqNiters.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::Loops) - .setDistributionOptions(cyclicNprocsEqNiters), - LinalgTransformationFilter( - StringAttr::get(context, "tensors_distribute1"), - StringAttr::get(context, "tensors_after_distribute1"))); - } -} - static void applyVectorTransferForwardingPatterns(func::FuncOp funcOp) { RewritePatternSet forwardPattern(funcOp.getContext()); forwardPattern.add(funcOp.getContext()); @@ -445,33 +183,6 @@ (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } -static void applyTilePattern(func::FuncOp funcOp, const std::string &loopType, - ArrayRef tileSizes, - ArrayRef peeledLoops, - bool scalarizeDynamicDims) { - MLIRContext *context = funcOp.getContext(); - RewritePatternSet tilingPattern(context); - LinalgTilingLoopType type = - llvm::StringSwitch(loopType) - .Case("for", LinalgTilingLoopType::Loops) - .Case("affine", LinalgTilingLoopType::AffineLoops) - .Case("parallel", LinalgTilingLoopType::ParallelLoops); - auto linalgTilingOptions = linalg::LinalgTilingOptions() - .setPeeledLoops(peeledLoops) - .setLoopType(type); - if (scalarizeDynamicDims) { - linalgTilingOptions.scalarizeDynamicDims(); - assert(tileSizes.empty() && - "tileSizes and scalarizeDynamicDims is mutually exclusive"); - } else { - linalgTilingOptions.setTileSizes(tileSizes); - } - linalg::LinalgTransformationFilter f(StringAttr::get(context, "tile")); - TilingPatterns::insert( - tilingPattern, linalgTilingOptions, f); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern)); -} - static void applySplitReduction(func::FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); linalg::populateSplitReductionPattern( @@ -521,12 +232,6 @@ }; std::unique_ptr cleanupGuard{(void *)1, lambda}; - if (testTileAndDistributionOptions) { - RewritePatternSet patterns(&getContext()); - fillTileAndDistributePatterns(&getContext(), patterns); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); - return; - } if (testPatterns) return applyPatterns(getOperation()); if (testVectorTransferForwardingPatterns) @@ -539,12 +244,6 @@ return applyGeneralizePadTensorPatterns(getOperation()); if (testSwapSubTensorPadTensor) return applyExtractSliceOfPadTensorSwapPattern(getOperation()); - if (testTilePattern) - return applyTilePattern(getOperation(), loopType, tileSizes, peeledLoops, - /*scalarizeDynamicDims=*/false); - if (testTileScalarizeDynamicDims) - return applyTilePattern(getOperation(), loopType, tileSizes, - /*peeledLoops=*/{}, /*scalarizeDynamicDims=*/true); if (testSplitReduction) return applySplitReduction(getOperation()); if (testSplitReductionInnerParallel) diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp --- a/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp +++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp @@ -57,10 +57,39 @@ llvm::cl::desc("perform expensive checks to better report errors in the " "transform IR")}; }; + +struct TestTransformDialectEraseSchedulePass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( + TestTransformDialectEraseSchedulePass) + + StringRef getArgument() const final { + return "test-transform-dialect-erase-schedule"; + } + + StringRef getDescription() const final { + return "erase transform dialect schedule from the IR"; + } + + void runOnOperation() override { + getOperation()->walk([&](Operation *nestedOp) { + if (isa<::mlir::transform::TransformOpInterface>(nestedOp)) { + nestedOp->erase(); + return WalkResult::skip(); + } + return WalkResult::advance(); + }); + } +}; } // namespace namespace mlir { namespace test { +/// Registers the test pass for erasing transform dialect ops. +void registerTestTransformDialectEraseSchedulePass() { + PassRegistration reg; +} /// Registers the test pass for applying transform dialect ops. void registerTestTransformDialectInterpreterPass() { PassRegistration reg; diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -114,6 +114,7 @@ void registerTestTensorTransforms(); void registerTestTilingInterface(); void registerTestTopologicalSortAnalysisPass(); +void registerTestTransformDialectEraseSchedulePass(); void registerTestTransformDialectInterpreterPass(); void registerTestVectorLowerings(); void registerTestNvgpuLowerings(); @@ -214,6 +215,7 @@ mlir::test::registerTestTensorTransforms(); mlir::test::registerTestTilingInterface(); mlir::test::registerTestTopologicalSortAnalysisPass(); + mlir::test::registerTestTransformDialectEraseSchedulePass(); mlir::test::registerTestTransformDialectInterpreterPass(); mlir::test::registerTestVectorLowerings(); mlir::test::registerTestNvgpuLowerings();