diff --git a/mlir/docs/Dialects/Linalg/_index.md b/mlir/docs/Dialects/Linalg/_index.md --- a/mlir/docs/Dialects/Linalg/_index.md +++ b/mlir/docs/Dialects/Linalg/_index.md @@ -545,7 +545,6 @@ Additionally, `linalg` provides a small subset of commonly named operations: ``` -* `linalg.copy`, * `linalg.fill`, * `linalg.dot`, * `linalg.matmul`, diff --git a/mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h b/mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h --- a/mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h +++ b/mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h @@ -39,25 +39,6 @@ PatternRewriter &rewriter) const override; }; -/// Rewrite pattern specialization for CopyOp, kicks in when both input and -/// output permutations are left unspecified or are the identity. -class CopyOpToLibraryCallRewrite : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(CopyOp op, - PatternRewriter &rewriter) const override; -}; - -/// Rewrite CopyOp with permutations into a sequence of TransposeOp and -/// permutation-free CopyOp. This interplays with TransposeOpConversion and -/// LinalgConversion to create a path to the LLVM dialect. -class CopyTransposeRewrite : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(CopyOp op, - PatternRewriter &rewriter) const override; -}; - /// Populate the given list with patterns that convert from Linalg to Standard. void populateLinalgToStandardConversionPatterns(RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -42,7 +42,7 @@ }: memref<2xf32>, memref<2xf32> br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): - "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + "memref.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () return } } @@ -58,7 +58,7 @@ cond_br %arg0, ^bb1, ^bb2 ^bb1: // pred: ^bb0 %0 = memref.alloc() : memref<2xf32> - linalg.copy(%arg1, %0) : memref<2xf32>, memref<2xf32> + memref.copy(%arg1, %0) : memref<2xf32>, memref<2xf32> br ^bb3(%0 : memref<2xf32>) ^bb2: // pred: ^bb0 %1 = memref.alloc() : memref<2xf32> @@ -72,11 +72,11 @@ linalg.yield %4 : f32 }: memref<2xf32>, memref<2xf32> %2 = memref.alloc() : memref<2xf32> - linalg.copy(%1, %2) : memref<2xf32>, memref<2xf32> + memref.copy(%1, %2) : memref<2xf32>, memref<2xf32> dealloc %1 : memref<2xf32> br ^bb3(%2 : memref<2xf32>) ^bb3(%3: memref<2xf32>): // 2 preds: ^bb1, ^bb2 - linalg.copy(%3, %arg2) : memref<2xf32>, memref<2xf32> + memref.copy(%3, %arg2) : memref<2xf32>, memref<2xf32> dealloc %3 : memref<2xf32> return } diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -16,7 +16,6 @@ include "mlir/Dialect/Linalg/IR/LinalgBase.td" include "mlir/Dialect/Linalg/IR/LinalgInterfaces.td" -include "mlir/Interfaces/CopyOpInterface.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" @@ -57,119 +56,6 @@ //===----------------------------------------------------------------------===// // Named Linalg ops, implemented as special configurations of generic ops. //===----------------------------------------------------------------------===// -// At the moment these are not declarative and require a bunch of C++ code. -// In the future, these should be migrated to a declarative specification. -def CopyOp : LinalgStructured_Op<"copy", [CopyOpInterface]> { - let description = [{ - Copies the data in the input view into the output view. - - Usage: - - ```mlir - linalg.copy(%arg0, %arg1) : memref, - memref - ``` - - One possible lowering to loop form is: - - ```mlir - %0 = linalg.dim %arg0, 0 : index - scf.for %i0 = %c0 to %0 step %c1 { - %1 = load %arg0[%i0] : memref - store %1, %arg1[%i0] : memref - } - ``` - - Optionally, can take `input_permutation` and `output_permutation` attributes - to reorder the dimensions of the input and output views. - - Usage: - - ```mlir - linalg.copy(%arg0, %arg1) {inputPermutation : (i, j, k) -> (i, k, j), - outputPermutation : (i, j, k) -> (k, j, i)} : - memref, - memref - ``` - - One possible lowering to loop form is: - - ```mlir - %0 = linalg.dim %arg0, 0 - %1 = linalg.dim %arg0, 1 - %2 = linalg.dim %arg0, 2 - scf.for %i0 = %c0 to %{{.*}} step %c1 { - scf.for %i1 = %c0 to %{{.*}} step %c1 { - scf.for %i2 = %c0 to %{{.*}} step %c1 { - %3 = load %arg0[%i0, %i2, %i1] : - memref - store %3, %arg1[%i2, %i1, %i0] : - memref - ``` - - The views are expected to be compatible for correctness but this is not - enforced at the moment. - }]; - - let arguments = (ins - AnyStridedMemRef:$input, - AnyStridedMemRef:$output, - OptionalAttr:$inputPermutation, - OptionalAttr:$outputPermutation); - let regions = (region AnyRegion:$region); - - let builders = [ - OpBuilder<(ins "Value":$input, "Value":$output, - CArg<"AffineMap", "AffineMap()">:$inputPermutation, - CArg<"AffineMap", "AffineMap()">:$outputPermutation, - CArg<"ArrayRef", "{}">:$attrs)>]; - - let extraClassDeclaration = structuredOpsDecls # [{ - ValueRange inputs() { return getOperands().take_front(); } - ValueRange outputs() { return getOperands().take_back(); } - - // Rank-polymorphic. - // filling_value -> O(ivs) with parallel iterators. - ArrayAttr iterator_types() { - int64_t nPar = getRank(getInputOperand(0)); - return Builder(getContext()).getStrArrayAttr( - SmallVector(nPar, getParallelIteratorTypeName())); - } - - // I(input_perm(ivs)) -> O(output_perm(ivs)) - ArrayAttr indexing_maps() { - MLIRContext *context = getContext(); - auto maybeInputMap = inputPermutation(); - auto maybeOutputMap = outputPermutation(); - int64_t inputRank = getRank(getInputOperand(0)); - int64_t outputRank = getRank(getOutputOperand(0)); - return Builder(getContext()).getAffineMapArrayAttr({ - extractOrIdentityMap(maybeInputMap, inputRank, context), - extractOrIdentityMap(maybeOutputMap, outputRank, context)}); - } - - Value getSource() { return input();} - Value getTarget() { return output(); } - - static void regionBuilder(ImplicitLocOpBuilder &b, Block &block); - static std::function - getRegionBuilder() { - return ®ionBuilder; - } - static unsigned getNumRegionArgs() { return 2; } - }]; - let verifier = [{ return ::verify(*this); }]; - - let assemblyFormat = [{ - `(` $input `,` $output `)` attr-dict `:` - type($input) `,` type($output) - custom($region, ref(type($input)), ref(type($input))) - }]; - - let hasCanonicalizer = 1; - let hasFolder = 1; - let skipDefaultBuilders = 1; -} def FillOp : LinalgStructured_Op<"fill", []> { let arguments = (ins diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -52,7 +52,7 @@ Option<"useAlloca", "use-alloca", "bool", /*default=*/"false", "Use stack allocations for memrefs (for testing purposes only)">, - Option<"useLinalgCopy", "use-linalg-copy", "bool", + Option<"useLinalgCopy", "use-memref.copy", "bool", /*default=*/"false", "Use a copy operation implemented as a Linalg op.">, Option<"fullyDynamicLayoutMaps", "fully-dynamic-layout-maps", "bool", diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -349,7 +349,7 @@ return *this; } /// Callback function to do the copy of data to and from the promoted - /// subview. If None then a linalg.copy is used. + /// subview. If None then a memref.copy is used. Optional copyInFn = None; Optional copyOutFn = None; LinalgPromotionOptions &setCopyInOutFns(CopyCallbackFn const ©In, @@ -390,6 +390,9 @@ /// Emit a suitable vector form for a Linalg op with fully static shape. LogicalResult vectorize(RewriterBase &builder, LinalgOp linalgOp); +/// Emit a suitable vector form for a Copy op with fully static shape. +LogicalResult vectorizeCopy(RewriterBase &builder, memref::CopyOp copyOp); + /// Emit a loop nest of `scf.for` with the proper body for `linalgOp`. FailureOr linalgOpToLoops(PatternRewriter &rewriter, LinalgOp linalgOp); @@ -934,6 +937,15 @@ LinalgTransformationFilter filter; }; +/// `filter` controls LinalgTransformMarker matching and update when specified. +/// See `vectorizeLinalgOp` for more details. +struct CopyVectorizationPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(memref::CopyOp copyOp, + PatternRewriter &rewriter) const override; +}; + /// Return vector::CombiningKind for the given op. llvm::Optional getCombinerOpKind(Operation *combinerOp); @@ -1206,7 +1218,7 @@ /// %subView = subview %allocOrView ... /// [optional] linalg.fill(%allocOrView, %cst) ... /// ... -/// linalg.copy(%in, %subView) ... +/// memref.copy(%in, %subView) ... /// vector.transfer_read %allocOrView[...], %cst ... /// ``` /// into @@ -1217,8 +1229,8 @@ /// ... /// vector.transfer_read %in[...], %cst ... /// ``` -/// Where there is no interleaved use between linalg.copy and transfer_read as -/// well as no interleaved use between linalg.fill and linalg.copy (if +/// Where there is no interleaved use between memref.copy and transfer_read as +/// well as no interleaved use between linalg.fill and memref.copy (if /// linalg.fill is specified). /// This is a custom rewrite to forward partial reads (with optional fills) to /// vector.transfer_read. @@ -1237,7 +1249,7 @@ /// %subView = subview %allocOrView... /// ... /// vector.transfer_write %..., %allocOrView[...] -/// linalg.copy(%subView, %out) +/// memref.copy(%subView, %out) /// ``` /// into /// ``` @@ -1247,7 +1259,7 @@ /// ... /// vector.transfer_write %..., %out[...] /// ``` -/// Where there is no interleaved use between transfer_write and linalg.copy. +/// Where there is no interleaved use between transfer_write and memref.copy. /// This is a custom rewrite to forward partial writes to vector.transfer_write. struct LinalgCopyVTWForwardingPattern : public OpRewritePattern { diff --git a/mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h b/mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h --- a/mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h +++ b/mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h @@ -57,7 +57,7 @@ None = 0, /// Split using in-bounds + out-of-bounds vector.transfer operations. VectorTransfer = 1, - /// Split using an in-bounds vector.transfer + linalg.fill + linalg.copy + /// Split using an in-bounds vector.transfer + linalg.fill + memref.copy /// operations. LinalgCopy = 2, /// Do not split vector transfer operation but instead mark it as "in-bounds". diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -47,7 +47,7 @@ /// memref.cast %A: memref to compatibleMemRefType /// scf.yield %view : compatibleMemRefType, index, index /// } else { -/// // slowpath, not in-bounds vector.transfer or linalg.copy. +/// // slowpath, not in-bounds vector.transfer or memref.copy. /// memref.cast %alloc: memref to compatibleMemRefType /// scf.yield %4 : compatibleMemRefType, index, index // } diff --git a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp --- a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp +++ b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp @@ -96,10 +96,6 @@ LogicalResult mlir::linalg::LinalgOpToLibraryCallRewrite::matchAndRewrite( LinalgOp op, PatternRewriter &rewriter) const { - // Only LinalgOp for which there is no specialized pattern go through this. - if (isa(op)) - return failure(); - auto libraryCallName = getLibraryCallSymbolRef(op, rewriter); if (!libraryCallName) return failure(); @@ -113,65 +109,12 @@ return success(); } -LogicalResult mlir::linalg::CopyOpToLibraryCallRewrite::matchAndRewrite( - CopyOp op, PatternRewriter &rewriter) const { - auto inputPerm = op.inputPermutation(); - if (inputPerm.hasValue() && !inputPerm->isIdentity()) - return failure(); - auto outputPerm = op.outputPermutation(); - if (outputPerm.hasValue() && !outputPerm->isIdentity()) - return failure(); - - auto libraryCallName = getLibraryCallSymbolRef(op, rewriter); - if (!libraryCallName) - return failure(); - - rewriter.replaceOpWithNewOp( - op, libraryCallName.getValue(), TypeRange(), - createTypeCanonicalizedMemRefOperands(rewriter, op.getLoc(), - op.getOperands())); - return success(); -} - -LogicalResult mlir::linalg::CopyTransposeRewrite::matchAndRewrite( - CopyOp op, PatternRewriter &rewriter) const { - Value in = op.input(), out = op.output(); - - // If either inputPerm or outputPerm are non-identities, insert transposes. - auto inputPerm = op.inputPermutation(); - if (inputPerm.hasValue() && !inputPerm->isIdentity()) - in = rewriter.create(op.getLoc(), in, - AffineMapAttr::get(*inputPerm)); - auto outputPerm = op.outputPermutation(); - if (outputPerm.hasValue() && !outputPerm->isIdentity()) - out = rewriter.create(op.getLoc(), out, - AffineMapAttr::get(*outputPerm)); - - // If nothing was transposed, fail and let the conversion kick in. - if (in == op.input() && out == op.output()) - return failure(); - - auto libraryCallName = getLibraryCallSymbolRef(op, rewriter); - if (!libraryCallName) - return failure(); - - rewriter.replaceOpWithNewOp( - op, libraryCallName.getValue(), TypeRange(), - createTypeCanonicalizedMemRefOperands(rewriter, op.getLoc(), {in, out})); - return success(); -} - /// Populate the given list with patterns that convert from Linalg to Standard. void mlir::linalg::populateLinalgToStandardConversionPatterns( RewritePatternSet &patterns) { // TODO: ConvOp conversion needs to export a descriptor with relevant // attribute values such as kernel striding and dilation. - // clang-format off - patterns.add< - CopyOpToLibraryCallRewrite, - CopyTransposeRewrite, - LinalgOpToLibraryCallRewrite>(patterns.getContext()); - // clang-format on + patterns.add(patterns.getContext()); } namespace { diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -372,110 +372,6 @@ } // namespace -//===----------------------------------------------------------------------===// -// CopyOp -//===----------------------------------------------------------------------===// -void CopyOp::regionBuilder(ImplicitLocOpBuilder &b, Block &block) { - assert(block.getNumArguments() == 2 && "CopyOp regionBuilder expects 2 args"); - b.create(block.getArgument(0)); -} - -void CopyOp::build(OpBuilder &builder, OperationState &result, Value input, - Value output, AffineMap inputPermutation, - AffineMap outputPermutation, - ArrayRef namedAttrs) { - result.addOperands({input, output}); - result.addAttributes(namedAttrs); - if (inputPermutation) - result.addAttribute("inputPermutation", - AffineMapAttr::get(inputPermutation)); - if (outputPermutation) - result.addAttribute("outputPermutation", - AffineMapAttr::get(outputPermutation)); - result.addRegion(); - fillStructuredOpRegion(builder, *result.regions.front(), - TypeRange{input.getType()}, - TypeRange{output.getType()}); -} - -ParseResult parseCopyOpRegion(OpAsmParser &parser, Region &r, Type inputType, - Type outputType) { - OpBuilder opBuilder(parser.getContext()); - fillStructuredOpRegion(opBuilder, r, TypeRange{inputType}, - TypeRange{outputType}); - return success(); -} - -/// CopyOp region is elided when printing. -void printCopyOpRegion(OpAsmPrinter &, Operation *, Region &, Type, Type) {} - -static LogicalResult verify(CopyOp op) { - OpOperand *output = op.getOutputOperand(0); - OpOperand *input = op.getInputOperand(0); - if (getElementTypeOrSelf(input->get()) != getElementTypeOrSelf(output->get())) - return op.emitOpError("expects views of the same type"); - if (op.getRank(input) != op.getRank(output)) - return op.emitOpError("expects views of the same rank"); - auto rank = op.getNumParallelLoops(); - auto inputPermutationMap = op.inputPermutation(); - if (inputPermutationMap) { - if (inputPermutationMap->getNumInputs() != rank) - return op.emitOpError("expects optional input_permutation map of rank ") - << rank; - if (!inputPermutationMap->isPermutation()) - return op.emitOpError( - "expects optional input_permutation map to be a permutation"); - } - auto outputPermutationMap = op.outputPermutation(); - if (outputPermutationMap) { - if (outputPermutationMap->getNumInputs() != rank) - return op.emitOpError("expects optional output_permutation map of rank ") - << rank; - if (!outputPermutationMap->isPermutation()) - return op.emitOpError( - "expects optional output_permutation map to be a permutation"); - } - if (rank == 0 && inputPermutationMap) - return op.emitOpError("expected no input permutation when rank == 0"); - if (rank == 0 && outputPermutationMap) - return op.emitOpError("expected no output permutation when rank == 0"); - return success(); -} - -void CopyOp::getEffects( - SmallVectorImpl> - &effects) { - effects.emplace_back(MemoryEffects::Read::get(), input(), - SideEffects::DefaultResource::get()); - effects.emplace_back(MemoryEffects::Write::get(), output(), - SideEffects::DefaultResource::get()); -} - -namespace { -/// Remove copy operations that copy data inplace. Requirements are: -/// 1) The input and output values are identical. -/// 2) The input and output permutation maps are identical. -struct EraseIdentityCopyOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(CopyOp copyOp, - PatternRewriter &rewriter) const override { - assert(copyOp.hasBufferSemantics()); - if (copyOp.input() == copyOp.output() && - copyOp.inputPermutation() == copyOp.outputPermutation()) { - rewriter.eraseOp(copyOp); - return success(); - } - return failure(); - } -}; -} // namespace - -void CopyOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.add(context); -} - //===----------------------------------------------------------------------===// // FillOp //===----------------------------------------------------------------------===// @@ -2165,7 +2061,6 @@ return foldMemRefCast(*this); \ } -LINALGOP_FOLDERS(CopyOp) LINALGOP_FOLDERS(FillOp) LINALGOP_FOLDERS(GenericOp) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -31,7 +31,7 @@ auto memrefType = memref.getType().cast(); auto alloc = b.create(loc, memrefType, getDynOperands(loc, memref, b)); - b.create(loc, memref, alloc); + b.create(loc, memref, alloc); return alloc; } @@ -197,10 +197,10 @@ /// ``` /// %a = alloc(sizes) /// %sv = subview %source [offsets][sizes][strides] -/// linalg_copy(%sv, %a) +/// memref.copy(%sv, %a) /// ``` /// -/// This pattern is arguable a std pattern once linalg::CopyOp becomes +/// This pattern is arguable a std pattern once memref::CopyOp becomes /// std::CopyOp. class ExtractSliceOpConverter : public OpConversionPattern { @@ -223,7 +223,7 @@ Value subView = rewriter.create( op.getLoc(), sourceMemref, op.getMixedOffsets(), op.getMixedSizes(), op.getMixedStrides()); - rewriter.create(op.getLoc(), subView, alloc); + rewriter.create(op.getLoc(), subView, alloc); rewriter.replaceOp(op, alloc); return success(); } @@ -235,11 +235,11 @@ /// conversion infra: /// ``` /// %sv = subview %dest [offsets][sizes][strides] -/// linalg_copy(%source, %sv) +/// memref.copy(%source, %sv) /// // replace with %dest /// ``` /// -/// This pattern is arguable a std pattern once linalg::CopyOp becomes +/// This pattern is arguable a std pattern once memref::CopyOp becomes /// std::CopyOp. class InsertSliceOpConverter : public OpConversionPattern { @@ -263,7 +263,7 @@ op.getLoc(), destMemRef, op.getMixedOffsets(), op.getMixedSizes(), op.getMixedStrides()); // Copy the small memref. - rewriter.create(op.getLoc(), sourceMemRef, subview); + rewriter.create(op.getLoc(), sourceMemRef, subview); rewriter.replaceOp(op, destMemRef); return success(); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -80,7 +80,7 @@ /// Create a linalg::GenericOp version of an n-D copy that can further tile, /// lower to loops or vectorize, unlike the current implementation of /// memref::CopyOp. -/// Do not depend on linalg::CopyOp that is getting deprecated. +/// Do not depend on memref::CopyOp that is getting deprecated. static LogicalResult createLinalgCopyOp(OpBuilder &b, Location loc, Value from, Value to) { auto memrefTypeFrom = from.getType().cast(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -637,7 +637,7 @@ /// Fused dimensions : i, j /// /// Example 3: -/// linalg.copy(%s, %b) +/// memref.copy(%s, %b) /// linalg.matmul ins(%a, %b) outs(%c) /// /// Number of parallel loops = 2 diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -186,7 +186,7 @@ Location loc = linalgOp.getLoc(); auto defaultCopyCallBack = [loc](OpBuilder &b, Value src, Value dst) -> LogicalResult { - b.create(loc, src, dst); + b.create(loc, src, dst); return success(); }; copyInFn = (options.copyInFn ? *(options.copyInFn) : defaultCopyCallBack); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -722,6 +722,11 @@ return vectorize(rewriter, linalgOp); } +LogicalResult mlir::linalg::CopyVectorizationPattern::matchAndRewrite( + memref::CopyOp copyOp, PatternRewriter &rewriter) const { + return vectorizeCopy(rewriter, copyOp); +} + LogicalResult mlir::linalg::applyStagedPatterns( Operation *op, ArrayRef stage1Patterns, const FrozenRewritePatternSet &stage2Patterns, diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -656,6 +656,38 @@ return success(); } +LogicalResult mlir::linalg::vectorizeCopy(RewriterBase &rewriter, + memref::CopyOp copyOp) { + + auto srcType = copyOp.source().getType().cast(); + auto dstType = copyOp.target().getType().cast(); + if (!srcType.hasStaticShape() || !dstType.hasStaticShape()) + return failure(); + + auto readType = + VectorType::get(srcType.getShape(), getElementTypeOrSelf(srcType)); + auto writeType = + VectorType::get(dstType.getShape(), getElementTypeOrSelf(dstType)); + + Location loc = copyOp->getLoc(); + Value zero = rewriter.create(loc, 0); + SmallVector indices(srcType.getRank(), zero); + + Value readValue = rewriter.create( + loc, readType, copyOp.source(), indices, + rewriter.getMultiDimIdentityMap(srcType.getRank())); + if (readValue.getType().cast().getRank() == 0) { + readValue = rewriter.create(loc, readValue); + readValue = rewriter.create(loc, writeType, readValue); + } + Operation *writeValue = rewriter.create( + loc, readValue, copyOp.target(), indices, + rewriter.getMultiDimIdentityMap(srcType.getRank())); + copyOp->getParentOfType().dump(); + rewriter.replaceOp(copyOp, writeValue->getResults()); + return success(); +} + //----------------------------------------------------------------------------// // Misc. vectorization patterns. //----------------------------------------------------------------------------// @@ -1168,11 +1200,11 @@ LDBG("with subView " << subView); // Find the copy into `subView` without interleaved uses. - CopyOp copyOp; + memref::CopyOp copyOp; for (auto &u : subView.getUses()) { - if (auto newCopyOp = dyn_cast(u.getOwner())) { - assert(newCopyOp.output().getType().isa()); - if (newCopyOp.output() != subView) + if (auto newCopyOp = dyn_cast(u.getOwner())) { + assert(newCopyOp.target().getType().isa()); + if (newCopyOp.target() != subView) continue; LDBG("copy candidate " << *newCopyOp); if (mayExistInterleavedUses(newCopyOp, xferOp, {viewOrAlloc, subView})) @@ -1206,10 +1238,10 @@ if (maybeFillOp) LDBG("with maybeFillOp " << *maybeFillOp); - // `in` is the subview that linalg.copy reads. Replace it. - Value in = copyOp.input(); + // `in` is the subview that memref.copy reads. Replace it. + Value in = copyOp.source(); - // linalg.copy + linalg.fill can be used to create a padded local buffer. + // memref.copy + linalg.fill can be used to create a padded local buffer. // The `masked` attribute is only valid on this padded buffer. // When forwarding to vector.transfer_read, the attribute must be reset // conservatively. @@ -1248,10 +1280,10 @@ Value subView = subViewOp.getResult(); // Find the copy from `subView` without interleaved uses. - CopyOp copyOp; + memref::CopyOp copyOp; for (auto &u : subViewOp.getResult().getUses()) { - if (auto newCopyOp = dyn_cast(u.getOwner())) { - if (newCopyOp.getInputOperand(0)->get() != subView) + if (auto newCopyOp = dyn_cast(u.getOwner())) { + if (newCopyOp.source() != subView) continue; if (mayExistInterleavedUses(xferOp, newCopyOp, {viewOrAlloc, subView})) continue; @@ -1263,11 +1295,11 @@ return failure(); // `out` is the subview copied into that we replace. - assert(copyOp.output().getType().isa()); - Value out = copyOp.output(); + assert(copyOp.target().getType().isa()); + Value out = copyOp.target(); // Forward vector.transfer into copy. - // linalg.copy + linalg.fill can be used to create a padded local buffer. + // memref.copy + linalg.fill can be used to create a padded local buffer. // The `masked` attribute is only valid on this padded buffer. // When forwarding to vector.transfer_write, the attribute must be reset // conservatively. diff --git a/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp --- a/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp @@ -111,7 +111,7 @@ /// memref.cast %A: memref to compatibleMemRefType /// scf.yield %view : compatibleMemRefType, index, index /// } else { -/// // slowpath, not in-bounds vector.transfer or linalg.copy. +/// // slowpath, not in-bounds vector.transfer or memref.copy. /// memref.cast %alloc: memref to compatibleMemRefType /// scf.yield %4 : compatibleMemRefType, index, index // } @@ -238,7 +238,7 @@ /// %2 = linalg.fill(%pad, %alloc) /// %3 = subview %view [...][...][...] /// %4 = subview %alloc [0, 0] [...] [...] -/// linalg.copy(%3, %4) +/// memref.copy(%3, %4) /// %5 = memref.cast %alloc: memref to compatibleMemRefType /// scf.yield %5, ... : compatibleMemRefType, index, index /// } @@ -270,7 +270,7 @@ std::pair copyArgs = createSubViewIntersection( rewriter, cast(xferOp.getOperation()), alloc); - b.create(loc, copyArgs.first, copyArgs.second); + b.create(loc, copyArgs.first, copyArgs.second); Value casted = b.create(loc, alloc, compatibleMemRefType); scf::ValueVector viewAndIndices{casted}; @@ -389,7 +389,7 @@ /// scf.if (%notInBounds) { /// %3 = subview %alloc [...][...][...] /// %4 = subview %view [0, 0][...][...] -/// linalg.copy(%3, %4) +/// memref.copy(%3, %4) /// } /// ``` static void createFullPartialLinalgCopy(RewriterBase &b, @@ -403,7 +403,7 @@ std::pair copyArgs = createSubViewIntersection( rewriter, cast(xferOp.getOperation()), alloc); - b.create(loc, copyArgs.first, copyArgs.second); + b.create(loc, copyArgs.first, copyArgs.second); b.create(loc, ValueRange{}); }); } @@ -459,7 +459,7 @@ /// memref.cast %A: memref to compatibleMemRefType /// scf.yield %view : compatibleMemRefType, index, index /// } else { -/// // slowpath, not in-bounds vector.transfer or linalg.copy. +/// // slowpath, not in-bounds vector.transfer or memref.copy. /// memref.cast %alloc: memref to compatibleMemRefType /// scf.yield %4 : compatibleMemRefType, index, index // } @@ -488,7 +488,7 @@ /// %0 = vector.transfer_write %arg, %1#0[%1#1, %1#2] {in_bounds = [true ... /// true]} /// scf.if (%notInBounds) { -/// // slowpath: not in-bounds vector.transfer or linalg.copy. +/// // slowpath: not in-bounds vector.transfer or memref.copy. /// } /// ``` /// where `alloc` is a top of the function alloca'ed buffer of one vector. diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -143,7 +143,7 @@ // CHECK-DAG: %[[ARG0_MEMREF:.*]] = bufferization.to_memref %[[ARG0_TENSOR]] : memref<2x3x4xvector<3x4xi4>> // CHECK-DAG: %[[ARG1_MEMREF:.*]] = bufferization.to_memref %[[ARG1_TENSOR]] : memref<3x2xf32> // CHECK: %[[INIT_BUFFER:.*]] = memref.alloc() : memref<3x2xf32> -// CHECK: linalg.copy(%[[ARG1_MEMREF]], %[[INIT_BUFFER]]) : memref<3x2xf32>, memref<3x2xf32> +// CHECK: memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32> // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>) // CHECK-SAME: outs(%[[INIT_BUFFER]] : memref<3x2xf32>) { @@ -178,14 +178,14 @@ // CHECK-NEXT: %[[A0:.*]] = memref.alloc() : memref<2x3xf32> // CHECK-NEXT: %[[SM0:.*]] = memref.subview %[[M]][0, 0] [2, 3] [1, 1] // CHECK-SAME: memref to memref<2x3xf32, #[[$MAP0]]> - // CHECK-NEXT: linalg.copy(%[[SM0]], %[[A0]]) : memref<2x3xf32, #[[$MAP0]]>, memref<2x3xf32> + // CHECK-NEXT: memref.copy %[[SM0]], %[[A0]] : memref<2x3xf32, #[[$MAP0]]> to memref<2x3xf32> // CHECK-NEXT: %[[RT0:.*]] = bufferization.to_tensor %[[A0]] : memref<2x3xf32> %st0 = tensor.extract_slice %t[0, 0][2, 3][1, 1] : tensor to tensor<2x3xf32> // CHECK-NEXT: %[[A1:.*]] = memref.alloc(%[[IDX]]) : memref<2x?xf32> // CHECK-NEXT: %[[SM1:.*]] = memref.subview %[[M]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2] // CHECK-SAME: memref to memref<2x?xf32, #[[$MAP1]]> - // CHECK-NEXT: linalg.copy(%[[SM1]], %[[A1]]) : memref<2x?xf32, #[[$MAP1]]>, memref<2x?xf32> + // CHECK-NEXT: memref.copy %[[SM1]], %[[A1]] : memref<2x?xf32, #[[$MAP1]]> to memref<2x?xf32> // CHECK-NEXT: %[[RT1:.*]] = bufferization.to_tensor %[[A1]] : memref<2x?xf32> %st1 = tensor.extract_slice %t[0, %i0][2, %i0][1, 2] : tensor to tensor<2x?xf32> @@ -221,18 +221,18 @@ // CHECK-NEXT: %[[DIM0:.*]] = tensor.dim %[[T]], %[[C0]] : tensor // CHECK-NEXT: %[[DIM1:.*]] = tensor.dim %[[T]], %[[C1]] : tensor // CHECK-NEXT: %[[M_COPY0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref - // CHECK-NEXT: linalg.copy(%[[M]], %[[M_COPY0]]) : memref, memref + // CHECK-NEXT: memref.copy %[[M]], %[[M_COPY0]] : memref to memref // CHECK-NEXT: %[[SUBVIEW0:.*]] = memref.subview %[[M_COPY0]][0, 0] [2, 3] [1, 1] // CHECK-SAME: memref to memref<2x3xf32, #[[$MAP0]]> - // CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]> + // CHECK-NEXT: memref.copy %[[SM0]], %[[SUBVIEW0]] : memref<2x3xf32> to memref<2x3xf32, #[[$MAP0]]> // CHECK-NEXT: %[[RT0:.*]] = bufferization.to_tensor %[[M_COPY0]] : memref %t0 = tensor.insert_slice %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor // CHECK-NEXT: %[[M_COPY1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref - // CHECK-NEXT: linalg.copy(%[[M]], %[[M_COPY1]]) : memref, memref + // CHECK-NEXT: memref.copy %[[M]], %[[M_COPY1]] : memref to memref // CHECK-NEXT: %[[SUBVIEW1:.*]] = memref.subview %[[M_COPY1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2] // CHECK-SAME: memref to memref<2x?xf32, #[[$MAP1]]> - // CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]> + // CHECK-NEXT: memref.copy %[[SM1]], %[[SUBVIEW1]] : memref<2x?xf32> to memref<2x?xf32, #[[$MAP1]]> // CHECK-NEXT: %[[RT1:.*]] = bufferization.to_tensor %[[M_COPY1]] : memref %t1 = tensor.insert_slice %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor @@ -296,9 +296,9 @@ // CHECK: %[[FILLED:.*]] = memref.alloc(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : memref<4x?x?x?xf32> // CHECK: linalg.fill(%[[CST]], %[[FILLED]]) : f32, memref<4x?x?x?xf32> // CHECK: %[[OUT:.*]] = memref.alloc(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : memref<4x?x?x?xf32> -// CHECK: linalg.copy(%[[FILLED]], %[[OUT]]) : memref<4x?x?x?xf32>, memref<4x?x?x?xf32> +// CHECK: memref.copy %[[FILLED]], %[[OUT]] : memref<4x?x?x?xf32> to memref<4x?x?x?xf32> // CHECK: %[[INTERIOR:.*]] = memref.subview %[[OUT]][0, 0, %[[OFFSET]], 0] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : memref<4x?x?x?xf32> to memref<4x?x2x?xf32, #map> -// CHECK: linalg.copy(%[[IN_MEMREF]], %[[INTERIOR]]) : memref<4x?x2x?xf32>, memref<4x?x2x?xf32, #map> +// CHECK: memref.copy %[[IN_MEMREF]], %[[INTERIOR]] : memref<4x?x2x?xf32> to memref<4x?x2x?xf32, #map> // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[OUT]] : memref<4x?x?x?xf32> // CHECK: return %[[OUT_TENSOR]] : tensor<4x?x?x?xf32> // CHECK: } diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -54,7 +54,7 @@ func @dce_zero_memref(%arg0 : memref<0xf32>, %arg1: tensor<0xf32>) -> tensor<0xf32> { // memref<0x32> is expected to be dce'ed - linalg.copy(%arg0, %arg0): memref<0xf32>, memref<0xf32> + memref.copy %arg0, %arg0 : memref<0xf32> to memref<0xf32> // tensor<0xf32> cannot be dce'ed %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) { @@ -67,7 +67,7 @@ // CHECK-LABEL: @dce_zero_memref // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<0xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<0xf32> -// CHECK-NOT: linalg.copy +// CHECK-NOT: memref.copy // CHECK-NEXT: return %[[ARG1]] // ----- @@ -330,22 +330,8 @@ // CHECK-LABEL: @self_copy func @self_copy(%arg0 : memref<2x3x?x4xf32>) { -// CHECK-NOT: linalg.copy - linalg.copy(%arg0, %arg0): memref<2x3x?x4xf32>, memref<2x3x?x4xf32> - -// CHECK: return - return -} - -// ----- - -// CHECK-LABEL: @self_copy_with_permutation -func @self_copy_with_permutation(%arg0 : memref<2x3x?x4xf32>) { - -// CHECK: linalg.copy - linalg.copy(%arg0, %arg0) - {inputPermutation = affine_map<(i, j, k, l) -> (j, k, i, l)>, - outputPermuation = affine_map<(i, j, k, l) -> (i, j, k, l)>} : memref<2x3x?x4xf32>, memref<2x3x?x4xf32> +// CHECK-NOT: memref.copy + memref.copy %arg0, %arg0 : memref<2x3x?x4xf32> to memref<2x3x?x4xf32> // CHECK: return return diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir --- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir +++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir @@ -3,7 +3,7 @@ // CHECK-LABEL: testAllocRead // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref // CHECK-NOT: linalg.fill -// CHECK-NOT: linalg.copy +// CHECK-NOT: memref.copy // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: vector.transfer_read %[[ARG0]] // CHECK-NOT: in_bounds @@ -12,7 +12,7 @@ %f0 = arith.constant 0.0: f32 %alloc = memref.alloc() : memref<32 x f32> %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> - linalg.copy(%in, %subview): memref, memref<16 x f32> + memref.copy %in, %subview : memref to memref<16 x f32> %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<32 x f32> return %0: vector<32 x f32> @@ -21,7 +21,7 @@ // CHECK-LABEL: testAllocFillRead // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref // CHECK-NOT: linalg.fill -// CHECK-NOT: linalg.copy +// CHECK-NOT: memref.copy // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: vector.transfer_read %[[ARG0]] // CHECK-NOT: in_bounds @@ -31,7 +31,7 @@ %alloc = memref.alloc() : memref<32 x f32> linalg.fill(%f0, %alloc) : f32, memref<32 x f32> %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> - linalg.copy(%in, %subview): memref, memref<16 x f32> + memref.copy %in, %subview : memref to memref<16 x f32> %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<32 x f32> return %0: vector<32 x f32> @@ -40,7 +40,7 @@ // CHECK-LABEL: testViewRead // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref // CHECK-NOT: linalg.fill -// CHECK-NOT: linalg.copy +// CHECK-NOT: memref.copy // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: vector.transfer_read %[[ARG0]] // CHECK-NOT: in_bounds @@ -50,7 +50,7 @@ %alloc = memref.alloc() : memref<128 x i8> %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32> %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> - linalg.copy(%in, %subview): memref, memref<16 x f32> + memref.copy %in, %subview : memref to memref<16 x f32> %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<128 x i8> return %0: vector<32 x f32> @@ -59,7 +59,7 @@ // CHECK-LABEL: testViewFillRead // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref // CHECK-NOT: linalg.fill -// CHECK-NOT: linalg.copy +// CHECK-NOT: memref.copy // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: vector.transfer_read %[[ARG0]] // CHECK-NOT: in_bounds @@ -70,7 +70,7 @@ %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32> %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> linalg.fill(%f0, %view) : f32, memref<32 x f32> - linalg.copy(%in, %subview): memref, memref<16 x f32> + memref.copy %in, %subview : memref to memref<16 x f32> %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<128 x i8> return %0: vector<32 x f32> @@ -79,7 +79,7 @@ // CHECK-LABEL: testAllocWrite // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: vector // CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// CHECK-NOT: linalg.copy +// CHECK-NOT: memref.copy // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: vector.transfer_write %[[ARG0]], %[[ARG1]] // CHECK-NOT: in_bounds @@ -89,7 +89,7 @@ %alloc = memref.alloc() : memref<32 x f32> %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> vector.transfer_write %vec, %alloc[%c0] {in_bounds = [true]} : vector<32 x f32>, memref<32 x f32> - linalg.copy(%subview, %out): memref<16 x f32>, memref + memref.copy %subview, %out : memref<16 x f32> to memref memref.dealloc %alloc : memref<32 x f32> return } @@ -97,7 +97,7 @@ // CHECK-LABEL: testViewWrite // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: vector // CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// CHECK-NOT: linalg.copy +// CHECK-NOT: memref.copy // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: vector.transfer_write %[[ARG0]], %[[ARG1]] // CHECK-NOT: in_bounds @@ -108,7 +108,7 @@ %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32> %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> vector.transfer_write %vec, %view[%c0] {in_bounds = [true]} : vector<32 x f32>, memref<32 x f32> - linalg.copy(%subview, %out): memref<16 x f32>, memref + memref.copy %subview, %out : memref<16 x f32> to memref memref.dealloc %alloc : memref<128 x i8> return } @@ -122,7 +122,7 @@ // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref // CHECK-NOT: vector.transfer_read %[[ARG0]] // CHECK: %[[ALLOC:.*]] = memref.alloc -// CHECK: linalg.copy +// CHECK: memref.copy // CHECK: vector.transfer_read %[[ALLOC]] func @failAllocFillRead(%in: memref) -> vector<32 x f32> { %c0 = arith.constant 0: index @@ -131,7 +131,7 @@ %alloc = memref.alloc() : memref<32 x f32> linalg.fill(%f0, %alloc) : f32, memref<32 x f32> %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> - linalg.copy(%in, %subview): memref, memref<16 x f32> + memref.copy %in, %subview : memref to memref<16 x f32> "some_interleaved_use"(%subview) : (memref<16 x f32>) -> () %0 = vector.transfer_read %alloc[%c0], %f1: memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<32 x f32> @@ -145,7 +145,7 @@ // CHECK-NOT: vector.transfer_write %[[ARG0]], %[[ARG1]] // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: vector.transfer_write %[[ARG0]], %[[ALLOC]] -// CHECK: linalg.copy +// CHECK: memref.copy func @failAllocWrite(%vec: vector<32 x f32>, %out: memref) { %c0 = arith.constant 0: index %f0 = arith.constant 0.0: f32 @@ -153,7 +153,7 @@ %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> vector.transfer_write %vec, %alloc[%c0] : vector<32 x f32>, memref<32 x f32> "some_interleaved_use"(%subview) : (memref<16 x f32>) -> () - linalg.copy(%subview, %out): memref<16 x f32>, memref + memref.copy %subview, %out : memref<16 x f32> to memref memref.dealloc %alloc : memref<32 x f32> return } diff --git a/mlir/test/Dialect/Linalg/fusion-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-pattern.mlir --- a/mlir/test/Dialect/Linalg/fusion-pattern.mlir +++ b/mlir/test/Dialect/Linalg/fusion-pattern.mlir @@ -71,165 +71,6 @@ // ----- -module { - func @rhs_fusion(%arg0: memref, %arg1: memref, - %arg2: memref, %arg3: memref) { - %cst = arith.constant 0.000000e+00 : f32 - linalg.copy(%arg1, %arg2) : memref, memref - linalg.fill(%cst, %arg3) : f32, memref - linalg.matmul {__internal_linalg_transform__ = "rhs_fusion"} - ins(%arg0, %arg2 : memref, memref) - outs(%arg3 : memref) - return - } -} -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (64, -d0 + s0)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (32, -d0 + s0)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> -// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 64, -d0 + s1)> -// CHECK: func @rhs_fusion -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: memref -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index -// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK-DAG: %[[CST:.+]] = arith.constant 0.0{{.*}} : f32 -// CHECK-DAG: linalg.copy(%[[ARG1]], %[[ARG2]]) -// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion_original" -// CHECK-DAG: %[[N:.+]] = memref.dim %[[ARG2]], %[[C1]] -// CHECK: scf.parallel (%[[IV0:.+]]) = -// CHECK-SAME: (%[[C0]]) to (%[[N]]) step (%[[C64]]) { -// CHECK: %[[K:.+]] = memref.dim %[[ARG2]], %[[C0]] -// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[N]]] -// CHECK: %[[SV1:.+]] = memref.subview %[[ARG2]][0, %[[IV0]]] -// CHECK-SAME: [%[[K]], %[[TILE_N]]] -// CHECK: %[[M:.+]] = memref.dim %[[ARG3]], %[[C0]] -// CHECK: %[[SV2:.+]] = memref.subview %[[ARG3]][0, %[[IV0]]] -// CHECK-SAME: [%[[M]], %[[TILE_N]] -// CHECK: %[[N_3:.+]] = memref.dim %[[ARG1]], %[[C1]] -// CHECK: %[[K_2:.+]] = memref.dim %[[ARG1]], %[[C0]] -// CHECK: %[[TILE_N_3:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[N_3]], %[[N]]] -// CHECK: %[[SV3:.+]] = memref.subview %[[ARG1]][0, %[[IV0]]] -// CHECK-SAME: [%[[K_2]], %[[TILE_N_3]]] -// CHECK: %[[SV3_2:.+]] = memref.subview %[[ARG2]][0, %[[IV0]]] -// CHECK-SAME: [%[[K]], %[[TILE_N_3]]] -// CHECK: linalg.copy(%[[SV3]], %[[SV3_2]]) -// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion_producer" -// CHECK-NOT: linalg.fill -// CHECK-DAG: %[[M_2:.+]] = memref.dim %[[ARG0]], %[[C0]] -// CHECK-DAG: %[[K_2:.+]] = memref.dim %[[ARG0]], %[[C1]] -// CHECK: scf.parallel (%[[IV1:.+]]) = -// CHECK-SAME: (%[[C0]]) to (%[[M_2]]) step (%[[C32]]) { -// CHECK-NEXT: scf.for %[[IV2:.+]] = %[[C0]] to %[[K_2]] step %[[C16]] { -// CHECK: %[[TILE_M:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[M_2]]] -// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K_2]]] -// CHECK: %[[SV4:.+]] = memref.subview %[[ARG0]][%[[IV1]], %[[IV2]]] -// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]] -// CHECK: %[[SV5:.+]] = memref.subview %[[SV1]][%[[IV2]], 0] -// CHECK-SAME: [%[[TILE_K]], %[[TILE_N]]] -// CHECK: %[[SV6:.+]] = memref.subview %[[SV2]][%[[IV1]], 0] -// CHECK-SAME: [%[[TILE_M]], %[[TILE_N]]] -// CHECK: linalg.matmul -// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion" -// CHECK-SAME: ins(%[[SV4]], %[[SV5]] -// CHECK-SAME: : memref, memref) -// CHECK-SAME: outs(%[[SV6]] : memref) -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: linalg.matmul -// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion_original" - - -// ----- - -module { - func @two_operand_fusion(%arg0: memref, %arg1: memref, - %arg2: memref, %arg3: memref) { - %cst = arith.constant 0.000000e+00 : f32 - linalg.copy(%arg0, %arg1) : memref, memref - linalg.fill(%cst, %arg3) : f32, memref - linalg.matmul {__internal_linalg_transform__ = "two_operand_fusion"} - ins(%arg1, %arg2 : memref, memref) - outs(%arg3 : memref) - return - } -} -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (32, -d0 + s0)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (64, -d0 + s0)> -// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 32, -d0 + s1)> -// CHECK: func @two_operand_fusion -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: memref -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index -// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index -// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index -// CHECK-DAG: %[[CST:.+]] = arith.constant 0.0{{.*}} : f32 -// CHECK: linalg.copy(%[[ARG0]], %[[ARG1]]) -// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_original" -// CHECK: linalg.fill(%[[CST]], %[[ARG3]]) -// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_original" -// CHECK-DAG: %[[M:.+]] = memref.dim %[[ARG1]], %[[C0]] -// CHECK: scf.parallel (%[[IV0:.+]]) = -// CHECK-SAME: (%[[C0]]) to (%[[M]]) step (%[[C32]]) { -// CHECK: %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]] -// CHECK: %[[K:.+]] = memref.dim %[[ARG1]], %[[C1]] -// CHECK: %[[SV1:.+]] = memref.subview %[[ARG1]][%[[IV0]], 0] -// CHECK-SAME: [%[[TILE_M]], %[[K]]] -// CHECK: %[[N:.+]] = memref.dim %[[ARG3]], %[[C1]] -// CHECK: %[[SV2:.+]] = memref.subview %[[ARG3]][%[[IV0]], 0] -// CHECK-SAME: [%[[TILE_M]], %[[N]]] -// CHECK: %[[M_2:.+]] = memref.dim %[[ARG3]], %[[C0]] -// CHECK: %[[TILE_M_3:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[M_2]], %[[M]]] -// CHECK: %[[SV2_2:.+]] = memref.subview %[[ARG3]][%[[IV0]], 0] -// CHECK-SAME: [%[[TILE_M_3]], %[[N]]] -// CHECK: %[[M_3:.+]] = memref.dim %[[ARG0]], %[[C0]] -// CHECK: %[[TILE_M_4:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[M_3]], %[[M]]] -// CHECK: %[[K_3:.+]] = memref.dim %[[ARG0]], %[[C1]] -// CHECK: %[[SV3:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0] -// CHECK-SAME: [%[[TILE_M_4]], %[[K_3]]] -// CHECK: %[[SV3_2:.+]] = memref.subview %[[ARG1]][%[[IV0]], 0] -// CHECK-SAME: [%[[TILE_M_4]], %[[K]]] -// CHECK: linalg.copy(%[[SV3]], %[[SV3_2]]) -// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_producer" -// CHECK: linalg.fill(%[[CST]], %[[SV2_2]]) -// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_producer" -// CHECK-DAG: %[[N_2:.+]] = memref.dim %[[ARG2]], %[[C1]] -// CHECK: scf.parallel (%[[IV1:.+]]) = -// CHECK-SAME: (%[[C0]]) to (%[[N_2]]) step (%[[C64]]) { -// CHECK-NEXT: scf.for %[[IV2:.+]] = %[[C0]] to %[[K]] step %[[C16]] { -// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K]]] -// CHECK: %[[SV4:.+]] = memref.subview %[[SV1]][0, %[[IV2]]] -// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]] -// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N_2]]] -// CHECK: %[[SV5:.+]] = memref.subview %[[ARG2]][%[[IV2]], %[[IV1]]] -// CHECK-SAME: [%[[TILE_K]], %[[TILE_N]]] -// CHECK: %[[SV6:.+]] = memref.subview %[[SV2]][0, %[[IV1]]] -// CHECK-SAME: [%[[TILE_M]], %[[TILE_N]]] -// CHECK: linalg.matmul -// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion" -// CHECK-SAME: ins(%[[SV4]], %[[SV5]] -// CHECK-SAME: : memref, memref) -// CHECK-SAME: outs(%[[SV6]] : memref) -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: linalg.matmul -// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_original" - -// ----- - module { func @matmul_fusion(%arg0: memref, %arg1: memref, %arg2: memref, %arg3: memref, diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -206,7 +206,14 @@ // CHECKPARALLEL: store %{{.*}}, {{.*}} : memref func @copy_view(%arg0: memref, %arg1: memref) { - linalg.copy(%arg0, %arg1) : memref, memref + linalg.generic { + iterator_types = ["parallel"], + indexing_maps = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>] } + ins(%arg0: memref) + outs(%arg1: memref) { + ^bb0(%a: f32, %b: f32): + linalg.yield %a : f32 + } return } // CHECK-LABEL: func @copy_view( @@ -221,38 +228,6 @@ // CHECKPARALLEL: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref // CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}] : memref -func @copy_view0(%arg0: memref, %arg1: memref) { - linalg.copy(%arg0, %arg1) : memref, memref - return -} -// CHECK-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { -// CHECK: memref.load %{{.*}}[] : memref -// CHECK: store %{{.*}}, %{{.*}}[] : memref - -// CHECKPARALLEL-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: memref.load %{{.*}}[] : memref -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref - -func @copy_view3(%arg0: memref, %arg1: memref) { - linalg.copy(%arg0, %arg1) {inputPermutation = affine_map<(i, j, k) -> (i, k, j)>, - outputPermutation = affine_map<(i, j, k) -> (k, j, i)>} : - memref, memref - return -} -// CHECK-LABEL: func @copy_view3 -// CHECK: (%{{.*}}: memref, %{{.*}}: memref) { -// CHECK: scf.for {{.*}} to %{{.*}} -// CHECK: scf.for {{.*}} to %{{.*}} -// CHECK: scf.for {{.*}} to %{{.*}} -// CHECK: %[[L:.*]] = memref.load {{.*}} : memref -// CHECK: store %[[L]], {{.*}} : memref - -// CHECKPARALLEL-LABEL: func @copy_view3 -// CHECKPARALLEL: (%{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: %[[L:.*]] = memref.load {{.*}} : memref -// CHECKPARALLEL: store %[[L]], {{.*}} : memref - #accesses = [ affine_map<(i, j, k) -> (i, j)>, affine_map<(i, j, k) -> (i, j, k)>, @@ -857,8 +832,14 @@ : memref to memref %1 = memref.subview %arg1[0, %arg4] [1, %arg3] [1, 1] : memref to memref - linalg.copy(%0, %1) - : memref, memref + linalg.generic { + iterator_types = ["parallel"], + indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>]} + ins(%0: memref) + outs(%1: memref) { + ^bb0(%a: i32, %b: i32): + linalg.yield %a : i32 + } return } // CHECK-LABEL: func @lower_to_loops_with_rank_reducing_subviews diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir --- a/mlir/test/Dialect/Linalg/promote.mlir +++ b/mlir/test/Dialect/Linalg/promote.mlir @@ -62,14 +62,14 @@ // DYNAMIC: memref.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC:.*]] = memref.subview %[[fullC]]{{.*}} : memref to memref -// CHECK: linalg.copy(%[[vA]], %[[partialA]]) : memref, memref -// CHECK: linalg.copy(%[[vB]], %[[partialB]]) : memref, memref -// CHECK: linalg.copy(%[[vC]], %[[partialC]]) : memref, memref +// CHECK: emref.copy %[[vA]], %[[partialA]] : memref to memref +// CHECK: memref.copy %[[vB]], %[[partialB]] : memref to memref +// CHECK: memref.copy %[[vC]], %[[partialC]] : memref to memref // // CHECK: linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} outs(%[[partialC]] // -// CHECK: linalg.copy(%[[partialC]], %[[vC]]) : -// CHECK: memref, +// CHECK: memref.copy %[[partialC]], %[[vC]] : +// CHECK: memref to // CHECK: memref // // CHECK: memref.dealloc %[[tmpA]] : memref<32xi8> @@ -132,14 +132,14 @@ // DYNAMIC: memref.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC_f64:.*]] = memref.subview %[[fullC_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref to memref -// CHECK: linalg.copy(%[[vA_f64]], %[[partialA_f64]]) : memref, memref -// CHECK: linalg.copy(%[[vB_f64]], %[[partialB_f64]]) : memref, memref -// CHECK: linalg.copy(%[[vC_f64]], %[[partialC_f64]]) : memref, memref +// CHECK: memref.copy %[[vA_f64]], %[[partialA_f64]] : memref to memref +// CHECK: memref.copy %[[vB_f64]], %[[partialB_f64]] : memref to memref +// CHECK: memref.copy %[[vC_f64]], %[[partialC_f64]] : memref to memref // // CHECK: linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} outs(%[[partialC_f64]] // -// CHECK: linalg.copy(%[[partialC_f64]], %[[vC_f64]]) : -// CHECK: memref, +// CHECK: memref.copy %[[partialC_f64]], %[[vC_f64]] : +// CHECK: memref to // CHECK: memref // // CHECK: memref.dealloc %[[tmpA_f64]] : memref<64xi8> diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir --- a/mlir/test/Dialect/Linalg/promotion_options.mlir +++ b/mlir/test/Dialect/Linalg/promotion_options.mlir @@ -24,11 +24,11 @@ // CHECK: %[[T20:.+]] = memref.alloc(%{{.*}}, %{{.*}}) : memref // CHECK: %[[T21:.+]] = memref.subview %[[T20]] // CHECK: linalg.fill(%[[C42]], %[[T19]]) -// CHECK: linalg.copy(%[[T7]], %[[T19]]) +// CHECK: memref.copy %[[T7]], %[[T19]] // CHECK: linalg.fill(%[[C42]], %[[T21]]) -// CHECK: linalg.copy(%[[T17]], %[[T21]]) +// CHECK: memref.copy %[[T17]], %[[T21]] // CHECK: linalg.matmul ins(%[[T19]], %[[T12]]{{.*}} outs(%[[T21]] // CHECK-NOT: linalg.fill -// CHECK: linalg.copy(%[[T21]], %[[T17]]) +// CHECK: memref.copy %[[T21]], %[[T17]] // CHECK: memref.dealloc %[[T18]] // CHECK: memref.dealloc %[[T20]] diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -8,8 +8,6 @@ // CHECK-DAG: #[[$id_2d:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)> // CHECK-DAG: #[[$id_1d:.*]] = affine_map<(d0, d1, d2) -> (d1)> -// CHECK-DAG: #[[$permute_0:.*]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> -// CHECK-DAG: #[[$permute_1:.*]] = affine_map<(d0, d1, d2) -> (d2, d1, d0)> // CHECK-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECK-DAG: #[[$strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> // CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> @@ -97,37 +95,6 @@ // ----- - -func @copy_view(%arg0: memref, - %arg1: memref) { - linalg.copy(%arg0, %arg1) : memref, - memref - return -} -// CHECK-LABEL: func @copy_view( -// CHECK: linalg.copy(%{{.*}}, %{{.*}}) : -// CHECK-SAME: memref, memref - -// ----- - - -func @copy_view3(%arg0: memref, - %arg1: memref) { - linalg.copy(%arg0, %arg1) {inputPermutation = affine_map<(i, j, k) -> (i, k, j)>, - outputPermutation = affine_map<(i, j, k) -> (k, j, i)>} : - memref, memref - return -} -// CHECK-LABEL: func @copy_view3( -// CHECK: %{{.*}}: memref, %{{.*}}: memref) { -// CHECK: linalg.copy(%{{.*}}, %{{.*}}) { -// CHECK-SAME: inputPermutation = #[[$permute_0]], -// CHECK-SAME: outputPermutation = #[[$permute_1]]} : -// CHECK-SAME: memref, -// CHECK-SAME: memref - -// ----- - #accesses_0 = [ affine_map<(i, j, k) -> (j, i)>, affine_map<(i, j, k) -> ()>, diff --git a/mlir/test/Dialect/Linalg/standard.mlir b/mlir/test/Dialect/Linalg/standard.mlir --- a/mlir/test/Dialect/Linalg/standard.mlir +++ b/mlir/test/Dialect/Linalg/standard.mlir @@ -1,12 +1,8 @@ // RUN: mlir-opt %s -convert-linalg-to-std | FileCheck %s // CHECK-DAG: #[[$map0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> -// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> -// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d2 * s2 + d1)> -// CHECK-DAG: #[[$map4:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)> // CHECK-DAG: #[[$map6:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> // CHECK-DAG: #[[$map7:.*]] = affine_map<()[s0] -> (s0)> -// CHECK-DAG: #[[$map8:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)> func @dot(%arg0: memref, %arg1: memref, @@ -30,40 +26,6 @@ // CHECK-SAME: %[[o0]], %[[o1]], %[[o2]]) : // CHECK-SAME: memref, memref, memref -func @copy(%arg0: memref, %arg1: memref) { - linalg.copy(%arg0, %arg1) : memref, memref - return -} -// CHECK-LABEL: func @copy( -// CHECK-SAME: %[[arg0:[a-zA-z0-9]*]]: memref, -// CHECK-SAME: %[[arg1:[a-zA-z0-9]*]]: memref) { -// CHECK: %[[o0:.*]] = memref.cast %[[arg0]] : -// CHECK-SAME: memref to memref -// CHECK: %[[o1:.*]] = memref.cast %[[arg1]] : -// CHECK-SAME: memref to memref -// CHECK: call @linalg_copy_viewsxsxsxf32_viewsxsxsxf32(%[[o0]], %[[o1]]) : -// CHECK-SAME: memref, memref - -func @copy_transpose(%arg0: memref, %arg1: memref) { - linalg.copy(%arg0, %arg1) {inputPermutation = affine_map<(i, j, k) -> (i, k, j)>, - outputPermutation = affine_map<(i, j, k) -> (k, j, i)>} - : memref, memref - return -} -// CHECK-LABEL: func @copy_transpose( -// CHECK-SAME: %[[arg0:[a-zA-z0-9]*]]: memref, -// CHECK-SAME: %[[arg1:[a-zA-z0-9]*]]: memref) { -// CHECK: %[[t0:.*]] = memref.transpose %[[arg0]] -// CHECK-SAME: (d0, d1, d2) -> (d0, d2, d1) : memref -// CHECK: %[[t1:.*]] = memref.transpose %[[arg1]] -// CHECK-SAME: (d0, d1, d2) -> (d2, d1, d0) : memref -// CHECK: %[[o0:.*]] = memref.cast %[[t0]] : -// CHECK-SAME: memref to memref -// CHECK: %[[o1:.*]] = memref.cast %[[t1]] : -// CHECK-SAME: memref to memref -// CHECK: call @linalg_copy_viewsxsxsxf32_viewsxsxsxf32(%[[o0]], %[[o1]]) : -// CHECK-SAME: memref, memref - #matmul_accesses = [ affine_map<(m, n, k) -> (m, k)>, affine_map<(m, n, k) -> (k, n)>, diff --git a/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir b/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns=test-matmul-to-vector-patterns-tile-1d | FileCheck %s -check-prefix=CHECK-1D -// RUN: mlir-opt %s -test-linalg-transform-patterns=test-matmul-to-vector-patterns-tile-2d | FileCheck %s -check-prefix=CHECK-2D - -func @matmul(%A: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>, - %B: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>, - %C: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>) { - linalg.matmul {__internal_linalg_transform__ = "START"} - ins(%A, %B: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>, - memref<1584x1584xf32, offset: 0, strides: [1584, 1]>) - outs(%C: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>) - return -} - -// CHECK-1D-LABEL:func @matmul -// CHECK-1D: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32> -// -// CHECK-1D: vector.transfer_read {{.*}} : memref<8x16xf32, #{{.*}}>, vector<8x16xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32> -// CHECK-1D: vector.transfer_read {{.*}} : memref<16x12xf32, #{{.*}}>, vector<16x12xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32> -// CHECK-1D: vector.transfer_read {{.*}} : memref<8x12xf32, #{{.*}}>, vector<8x12xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32> -// -// CHECK-1D: vector.contract -// CHECK-1D-SAME: iterator_types = ["parallel", "parallel", "reduction"] -// CHECK-1D-SAME: : vector<8x16xf32>, vector<16x12xf32> into vector<8x12xf32> -// -// CHECK-1D: vector.transfer_read {{.*}} : memref<8x12xf32>, vector<8x12xf32> -// CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32, #{{.*}}> - -// CHECK-2D-LABEL:func @matmul -// CHECK-2D: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32> -// CHECK-2D: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32> -// CHECK-2D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32> -// -// CHECK-2D: linalg.copy -// CHECK-2D: linalg.copy -// CHECK-2D: linalg.copy -// -// CHECK-2D: vector.contract -// CHECK-2D-SAME: iterator_types = ["parallel", "parallel", "reduction"] -// CHECK-2D-SAME: : vector<8x16xf32>, vector<16x12xf32> into vector<8x12xf32> -// -// CHECK-2D: linalg.copy diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir --- a/mlir/test/Dialect/Linalg/transform-patterns.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir @@ -229,9 +229,9 @@ // CHECK: %[[v2:.*]] = memref.view %[[a2]]{{.*}} : memref<24000000xi8> to memref // CHECK: %[[l2:.*]] = memref.subview %[[v2]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] // CHECK-SAME: memref to memref -// CHECK: linalg.copy(%[[s0]], %[[l0]]) : memref, memref -// CHECK: linalg.copy(%[[s1]], %[[l1]]) : memref, memref -// CHECK: linalg.copy(%[[s2]], %[[l2]]) : memref, memref +// CHECK: memref.copy %[[s0]], %[[l0]] : memref to memref +// CHECK: memref.copy %[[s1]], %[[l1]] : memref to memref +// CHECK: memref.copy %[[s2]], %[[l2]] : memref to memref // CHECK: linalg.matmul // CHECK-SAME: ins(%[[v0]], %[[v1]] : memref, memref) // CHECK-SAME: outs(%[[v2]] : memref) @@ -282,8 +282,8 @@ // CHECK-NOT: memref.alloc // CHECK-NOT: memref.view // CHECK-NOT: memref.subview -// CHECK: linalg.copy(%[[s0]], %[[l0]]) : memref, memref -// CHECK-NOT: linalg.copy +// CHECK: memref.copy %[[s0]], %[[l0]] : memref to memref +// CHECK-NOT: memref.copy // CHECK: linalg.matmul // CHECK-SAME: ins(%[[v0]], %[[s1]] : memref, memref) // CHECK-SAME: outs(%[[s2]] : memref) @@ -307,7 +307,7 @@ // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<32000000xi8> to memref // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref to memref // CHECK: linalg.fill({{.*}}, %[[v0]]) : f32, memref -// CHECK: linalg.copy(%[[s0]], %[[l0]]) : memref, memref +// CHECK: memref.copy %[[s0]], %[[l0]] : memref to memref // CHECK: linalg.fill(%[[cf]], %[[v0]]) : f32, memref func @aligned_promote_fill_complex(%arg0: memref, offset: ?, strides: [?, 1]>) { @@ -330,7 +330,7 @@ // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<64000000xi8> to memref> // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref> to memref, #[[$STRIDED_2D_u_1]]> // CHECK: linalg.fill({{.*}}, %[[v0]]) : complex, memref> -// CHECK: linalg.copy(%[[s0]], %[[l0]]) : memref, #map{{.*}}>, memref, #map{{.*}}> +// CHECK: memref.copy %[[s0]], %[[l0]] : memref, #map{{.*}}> to memref, #map{{.*}}> // CHECK: linalg.fill(%[[cc]], %[[v0]]) : complex, memref> func @tile_permute_parallel_loop(%arg0: memref, diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -212,7 +212,7 @@ func @test_vectorize_copy(%A : memref<8x16xf32>, %B : memref<8x16xf32>) { // CHECK: %[[V:.*]] = vector.transfer_read {{.*}} : memref<8x16xf32>, vector<8x16xf32> // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> - linalg.copy(%A, %B) : memref<8x16xf32>, memref<8x16xf32> + memref.copy %A, %B : memref<8x16xf32> to memref<8x16xf32> return } @@ -225,7 +225,7 @@ // CHECK: %[[val:.*]] = vector.extractelement %[[V]][] : vector // CHECK: %[[VV:.*]] = vector.broadcast %[[val]] : f32 to vector // CHECK: vector.transfer_write %[[VV]], %[[B]][] : vector, memref - linalg.copy(%A, %B) : memref, memref + memref.copy %A, %B : memref to memref return } @@ -462,7 +462,7 @@ iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%B, %A, %A, %B: memref<4x4xf32>, memref<4xf32>, memref<4xf32>, memref<4x4xf32>) outs(%C : memref<4x4x4x4xf32>) { - ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): + ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %s = arith.subf %arg0, %arg1 : f32 %a = arith.addf %arg2, %s : f32 %b = arith.addf %arg3, %a : f32 @@ -775,7 +775,7 @@ ], iterator_types = ["parallel", "parallel", "reduction"] } ins(%input : tensor<4x16x8xf32>) outs(%output : tensor<4x16xf32>) { - ^bb0(%arg0: f32, %arg1: f32): + ^bb0(%arg0: f32, %arg1: f32): %1 = math.exp %arg0 : f32 %2 = arith.addf %1, %arg1 : f32 linalg.yield %2 : f32 @@ -811,7 +811,7 @@ ], iterator_types = ["parallel", "reduction", "reduction", "parallel"] } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) outs(%output : tensor<5x2xf32>) { - ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): + ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %1 = math.exp %arg0 : f32 %2 = math.exp %arg1 : f32 %3 = arith.addf %1, %2 : f32 @@ -838,7 +838,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { - ^bb0(%in0: f32, %out0: f32): + ^bb0(%in0: f32, %out0: f32): %max = arith.maxf %in0, %out0 : f32 linalg.yield %max : f32 } -> tensor<4xf32> @@ -863,7 +863,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { - ^bb0(%in0: f32, %out0: f32): + ^bb0(%in0: f32, %out0: f32): %min = arith.minf %out0, %in0 : f32 linalg.yield %min : f32 } -> tensor<4xf32> @@ -887,7 +887,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { - ^bb0(%in0: f32, %out0: f32): + ^bb0(%in0: f32, %out0: f32): %mul = arith.mulf %in0, %out0 : f32 linalg.yield %mul : f32 } -> tensor<4xf32> @@ -910,7 +910,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { - ^bb0(%in0: i1, %out0: i1): + ^bb0(%in0: i1, %out0: i1): %or = arith.ori %in0, %out0 : i1 linalg.yield %or : i1 } -> tensor<4xi1> @@ -933,7 +933,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { - ^bb0(%in0: i1, %out0: i1): + ^bb0(%in0: i1, %out0: i1): %and = arith.andi %in0, %out0 : i1 linalg.yield %and : i1 } -> tensor<4xi1> @@ -956,7 +956,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { - ^bb0(%in0: i1, %out0: i1): + ^bb0(%in0: i1, %out0: i1): %xor = arith.xori %in0, %out0 : i1 linalg.yield %xor : i1 } -> tensor<4xi1> @@ -1051,7 +1051,7 @@ iterator_types = ["reduction"]} ins(%arg0 : tensor<32xf32>) outs(%1 : tensor) { - ^bb0(%a: f32, %b: f32): + ^bb0(%a: f32, %b: f32): %3 = arith.addf %a, %b : f32 linalg.yield %3 : f32 } -> tensor diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s -test-vector-transfer-full-partial-split -split-input-file | FileCheck %s -// RUN: mlir-opt %s -test-vector-transfer-full-partial-split=use-linalg-copy -split-input-file | FileCheck %s --check-prefix=LINALG +// RUN: mlir-opt %s -test-vector-transfer-full-partial-split=use-memref-copy -split-input-file | FileCheck %s --check-prefix=LINALG // CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)> // CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)> @@ -82,7 +82,7 @@ // LINALG: %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1] // LINALG-SAME: memref to memref // LINALG: %[[alloc_view:.*]] = memref.subview %[[alloc]][0, 0] [%[[sv0]], %[[sv1]]] [1, 1] - // LINALG: linalg.copy(%[[sv]], %[[alloc_view]]) : memref, memref + // LINALG: memref.copy %[[sv]], %[[alloc_view]] : memref to memref // LINALG: %[[yielded:.*]] = memref.cast %[[alloc]] : // LINALG-SAME: memref<4x8xf32> to memref // LINALG: scf.yield %[[yielded]], %[[c0]], %[[c0]] : @@ -174,7 +174,7 @@ // LINALG: %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1] // LINALG-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref // LINALG: %[[alloc_view:.*]] = memref.subview %[[alloc]][0, 0] [%[[sv0]], %[[sv1]]] [1, 1] - // LINALG: linalg.copy(%[[sv]], %[[alloc_view]]) : memref, memref + // LINALG: memref.copy %[[sv]], %[[alloc_view]] : memref to memref // LINALG: %[[yielded:.*]] = memref.cast %[[alloc]] : // LINALG-SAME: memref<4x8xf32> to memref // LINALG: scf.yield %[[yielded]], %[[c0]], %[[c0]] : @@ -279,8 +279,8 @@ // LINALG-SAME: [%[[I]], %[[J]]] [%[[VAL_20]], %[[VAL_21]]] // LINALG-SAME: [1, 1] : memref<4x8xf32> to memref // LINALG: %[[DEST_VIEW:.*]] = memref.subview %[[DEST]][0, 0] [%[[VAL_20]], %[[VAL_21]]] [1, 1] -// LINALG: linalg.copy(%[[VAL_22]], %[[DEST_VIEW]]) -// LINALG-SAME: : memref, memref +// LINALG: memref.copy %[[VAL_22]], %[[DEST_VIEW]] +// LINALG-SAME: : memref to memref // LINALG: } // LINALG: return // LINALG: } @@ -388,8 +388,8 @@ // LINALG-SAME: [%[[I]], %[[J]]] [%[[VAL_20]], %[[VAL_21]]] // LINALG-SAME: [1, 1] : memref<4x8xf32> to memref // LINALG: %[[DEST_VIEW:.*]] = memref.subview %[[DEST]][0, 0] [%[[VAL_20]], %[[VAL_21]]] [1, 1] -// LINALG: linalg.copy(%[[VAL_22]], %[[DEST_VIEW]]) -// LINALG-SAME: : memref, memref +// LINALG: memref.copy %[[VAL_22]], %[[DEST_VIEW]] +// LINALG-SAME: : memref to memref // LINALG: } // LINALG: return // LINALG: } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir @@ -2,7 +2,7 @@ // RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \ // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \ // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.fill register-tile-sizes=4,32 vectorize" | \ -// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \ +// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=memref.copy register-tile-sizes=4,32 vectorize" | \ // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \ // RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \ diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1194,7 +1194,7 @@ memref.dealloc %5 : memref<2xf32> scf.yield %6 : memref<2xf32> } - linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32> + memref.copy %2, %arg4 : memref<2xf32> to memref<2xf32> memref.dealloc %2 : memref<2xf32> return } @@ -1204,7 +1204,7 @@ // CHECK-NEXT: memref.dealloc // CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc // CHECK-NEXT: scf.yield %[[ALLOC2]] -// CHECK: linalg.copy(%[[ALLOC1]] +// CHECK: memref.copy %[[ALLOC1]] // CHECK-NEXT: memref.dealloc %[[ALLOC1]] // ----- diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp @@ -144,7 +144,7 @@ "Split vector transfers between slow (masked) and fast " "(unmasked) variants. Possible options are:\n" "\tnone: keep unsplit vector.transfer and pay the full price\n" - "\tlinalg-copy: use linalg.fill + linalg.copy for the slow path\n" + "\tmemref.copy: use linalg.fill + memref.copy for the slow path\n" "\tvector-transfers: use extra small unmasked vector.transfer for" " the slow path\n"), llvm::cl::init("none")}; @@ -167,7 +167,7 @@ "latch on:\n" "\tlinalg.matmul: anchor on linalg.matmul\n" "\tlinalg.matmul_column_major: anchor on linalg.matmul_column_major\n" - "\tlinalg.copy: anchor on linalg.copy\n" + "\tmemref.copy: anchor on memref.copy\n" "\tlinalg.fill: anchor on linalg.fill\n"), llvm::cl::init("")}; Option anchorFuncOpName{ @@ -306,7 +306,7 @@ llvm::StringSwitch( splitVectorTransfersTo.getValue()) .Case("none", vector::VectorTransferSplit::None) - .Case("linalg-copy", vector::VectorTransferSplit::LinalgCopy) + .Case("memref-copy", vector::VectorTransferSplit::LinalgCopy) .Case("vector-transfers", vector::VectorTransferSplit::VectorTransfer) .Default(vector::VectorTransferSplit::None); diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -81,7 +81,7 @@ Option testVectorTransferForwardingPatterns{ *this, "test-vector-transfer-forwarding-patterns", llvm::cl::desc( - "Test a fused pass that forwards linalg.copy to vector.transfer"), + "Test a fused pass that forwards memref.copy to vector.transfer"), llvm::cl::init(false)}; Option testGenericToVectorPattern{ *this, "test-linalg-to-vector-patterns", @@ -232,7 +232,8 @@ //===--------------------------------------------------------------------===// patterns.add( ctx, LinalgTransformationFilter(StringAttr::get(ctx, "VECTORIZE")) - .addOpFilter()); + .addOpFilter()); + patterns.add(ctx); //===--------------------------------------------------------------------===// // Linalg generic interchange pattern. @@ -301,7 +302,8 @@ MatmulOp::getOperationName(), ctx, LinalgVectorizationOptions(), LinalgTransformationFilter(StringAttr::get(ctx, "VEC")))); patternsVector.back().add( - ctx, LinalgTransformationFilter().addOpFilter()); + ctx, LinalgTransformationFilter().addOpFilter()); + patternsVector.back().add(ctx); } //===----------------------------------------------------------------------===// @@ -339,7 +341,7 @@ FloatAttr::get(floatType, 42.0)); b.create(src.getLoc(), cst, dst); } - b.create(src.getLoc(), src, dst); + b.create(src.getLoc(), src, dst); return success(); } @@ -546,10 +548,11 @@ static void applyLinalgToVectorPatterns(FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); + auto *ctx = funcOp.getContext(); patterns.add( - funcOp.getContext(), - LinalgTransformationFilter() - .addOpFilter()); + ctx, LinalgTransformationFilter() + .addOpFilter()); + patterns.add(ctx); populatePadOpVectorizationPatterns(patterns); populateConvolutionVectorizationPatterns(patterns); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -470,9 +470,9 @@ } Option useLinalgOps{ - *this, "use-linalg-copy", + *this, "use-memref-copy", llvm::cl::desc("Split using a unmasked vector.transfer + linalg.fill + " - "linalg.copy operations."), + "memref.copy operations."), llvm::cl::init(false)}; void runOnOperation() override { MLIRContext *ctx = &getContext();