diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -93,25 +93,6 @@ /// (boundsMap = affine.map<() -> (42)>) FailureOr getConstantUpperBoundForIndex(Value value); -/// Create an ExtractSliceOp and, if `source` is defined by an ExtractSliceOp, -/// fold it by adding the offsets. -/// -/// Example: -/// ``` -/// %0 = tensor.extract_slice %arg0[3, 4][3, 32][1, 1] : tensor<64x64xf32> to -/// tensor<3x32xf32> -/// %1 = tensor.extract_slice %0[0, 5][3, 4][1, 1] : tensor<3x32xf32> to -/// tensor<3x4xf32> -/// ``` -/// folds into: -/// ``` -/// %1 = tensor.extract_slice %arg0[3, 9][3, 4][1, 1] : tensor<64x64xf32> to -/// tensor<3x4xf32> -/// ``` -tensor::ExtractSliceOp makeComposedExtractSliceOp( - OpBuilder &b, Location loc, Value source, ArrayRef offsets, - ArrayRef sizes, ArrayRef strides); - /// Create a tensor::PadOp that pads `source` to the size of the statically /// sized `type` whose static sizes are assumed to be greater than the dynamic /// `source` size. The padding introduces trailing `pad` values until the target diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -346,48 +346,6 @@ return *std::min_element(constantBounds.begin(), constantBounds.end()); } -tensor::ExtractSliceOp makeComposedExtractSliceOp( - OpBuilder &b, Location loc, Value source, ArrayRef offsets, - ArrayRef sizes, ArrayRef strides) { - assert(source && "expect source to be nonzero"); - - // Do not fold if the producer is not an ExtractSliceOp. - auto producerOp = source.getDefiningOp(); - if (!producerOp) - return b.create(loc, source, offsets, sizes, - strides); - - // Do not fold if the producer is rank reducing or if there are any non-unit - // strides. Supporting non-unit strides complicates the offset computation - // since the consumer offsets need to be multiplied by the producer strides. - // TODO: support non-unit strides once there are use cases. - SmallVector allStrides = producerOp.getMixedStrides(); - allStrides.append(strides.begin(), strides.end()); - bool hasNonUnitStride = any_of(allStrides, [](OpFoldResult ofr) { - return getConstantIntValue(ofr) != static_cast(1); - }); - if (hasNonUnitStride || - producerOp.getSourceType().getRank() != - producerOp.getResult().getType().cast().getRank()) - return b.create(loc, source, offsets, sizes, - strides); - - // Fold the producer by adding the offests and extracting the slice directly - // from the producer source tensor. - SmallVector foldedOffsets(offsets.begin(), offsets.end()); - AffineExpr dim1, dim2; - bindDims(b.getContext(), dim1, dim2); - for (const auto &en : enumerate(producerOp.getMixedOffsets())) { - SmallVector offsetValues = { - getValueOrCreateConstantIndexOp(b, loc, foldedOffsets[en.index()]), - getValueOrCreateConstantIndexOp(b, loc, en.value())}; - foldedOffsets[en.index()] = - makeComposedAffineApply(b, loc, dim1 + dim2, offsetValues).getResult(); - } - return b.create(loc, producerOp.getSource(), - foldedOffsets, sizes, strides); -} - Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold) { // Exit if `source` is not defined by an ExtractSliceOp. @@ -777,8 +735,8 @@ sliceParams.sizes, sliceParams.strides); }) .Case([&](RankedTensorType) { - return makeComposedExtractSliceOp( - builder, loc, valueToTile, sliceParams.offsets, + return builder.create( + loc, valueToTile, sliceParams.offsets, sliceParams.sizes, sliceParams.strides); }) .Default([](ShapedType) -> Operation * { diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir --- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir +++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir @@ -47,13 +47,16 @@ // The canonicalizer is able to recover static shapes of for linalg.generic // instances, use those to differentiate the quadrants. + // CHECK: %[[SLICE_1_IN:.+]] = tensor.extract_slice %[[IN]][0, 0] [4, 34] [1, 1] // CHECK: %[[SLICE_1:.+]] = tensor.extract_slice %[[OUT]][0, 0] [4, 34] [1, 1] // CHECK: scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_1:.+]] = %[[SLICE_1]]) + // CHECK: %[[OUTSLICE_1_IN:.+]] = tensor.extract_slice %[[SLICE_1_IN]][%[[I1]], 0] [2, 34] [1, 1] // CHECK: %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 34] [1, 1] - // CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 16] [1, 1] + // CHECK: %[[SLICE_2_IN:.+]] = tensor.extract_slice %[[OUTSLICE_1_IN]][0, 0] [2, 16] [1, 1] + // CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1] // CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]]) - // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[IN]][%[[I1]], %[[I2]]] [2, 8] [1, 1] + // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1] // CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1] // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>) // CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]] diff --git a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir --- a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir +++ b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir @@ -23,11 +23,13 @@ // CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] // CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]] // CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { +// CHECK: %[[OUTSLICEA:.+]] = tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor to tensor +// CHECK: %[[OUTSLICEB:.+]] = tensor.extract_slice %{{.*}}[0, %{{.*}}] [%{{.*}}, %{{.*}}] [1, 1] : tensor to tensor // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TC1]] // CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[SLICE]] // CHECK: %[[sTD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[FILL]]) -> (tensor) { -// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor to tensor -// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor +// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[OUTSLICEA]][{{.*}}] : tensor to tensor +// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[OUTSLICEB]][{{.*}}] : tensor to tensor // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) // CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -77,8 +77,6 @@ // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (d0 + 3)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0) -> (d0 + 4)> // CHECK: fold_extract_slice // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor @@ -93,15 +91,15 @@ %0 = tensor.dim %arg1, %c0 : tensor %1 = tensor.extract_slice %arg0[3, 4] [%0, 42] [1, 1] : tensor to tensor + // CHECK: %[[E:.*]] = tensor.extract_slice %[[ARG0]][3, 4] [%[[DIM]], 42] [1, 1] : tensor to tensor + // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = // Fold the existing extract slice op into the one created by the tiling. // CHECK: %[[SIZE0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM]] - // CHECK: %[[OFF0:.*]] = affine.apply #[[MAP1]](%[[IV0]] - // CHECK: %[[OFF1:.*]] = affine.apply #[[MAP2]](%[[IV1]] - // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] - // CHECK-SAME: %[[OFF0]], %[[OFF1]] + // CHECK: %[[T0:.*]] = tensor.extract_slice %[[E]] + // CHECK-SAME: %[[IV0]], %[[IV1]] // CHECK-SAME: %[[SIZE0]], 3 // CHECK-SAME: 1, 1 // CHECK: {{.*}} = linalg.generic {{.*}} ins(%[[T0]] diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir @@ -66,11 +66,12 @@ // CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index // CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]]) // CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]]) -// CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]] -// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE0]] : tensor) +// CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]] +// CHECK: %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]] +// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor) // CHECK: %[[C4:.+]] = arith.constant 4 : index // CHECK: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]]) -// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[INPUT]] +// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]] // CHECK: %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0] // CHECK: linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor) outs(%[[OUT_SLICE2]] : tensor) diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir @@ -153,14 +153,14 @@ // CHECK: %[[OUT_2:.+]] = tensor.extract_slice %[[PARTIAL_1]] // Note that `extract_slice` taking a slice from another `extract_slice` result // is folded to use the operand of the first `extract_slice`. - // CHECK: %[[IN_21:.+]] = tensor.extract_slice %[[IN]] - // CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[PARTIAL_1]] + // CHECK: %[[IN_21:.+]] = tensor.extract_slice %[[IN_2]] + // CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]] // CHECK: %[[RES_21:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_21]] : tensor<6x16xf32>) // CHECK-SAME: outs(%[[OUT_21]] : tensor<6x16xf32>) // CHECK: %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]] // - // CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN]] + // CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]] // CHECK: %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]] // CHECK: %[[RES_22:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_22]] : tensor<6x18xf32>)