diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp --- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @@ -488,11 +488,6 @@ // Compute new offsets, lengths, low padding, high padding. SmallVector newOffsets, newLengths, newStrides; SmallVector newLows, newHighs; - // Set to true if the original data source is not read at all. - bool hasZeroLen = false; - // Same as hasZeroLen, but for dynamic dimension sizes. This condition - // is true if the original data source turns out to be unused at runtime. - Value dynHasZeroLenCond; int64_t rank = padOp.getSourceType().getRank(); for (unsigned dim = 0; dim < rank; ++dim) { @@ -506,10 +501,12 @@ // The new amount of low padding is `low - offset`. Except for the case // where none of the low padding is read. In that case, the new amount of - // low padding is zero. + // low padding is zero. Also, do not add more low padding than the size of + // result dimension. // // Optimization: If low = 0, then newLow = 0. - OpFoldResult newLow = hasLowPad ? max(zero, sub(low, offset)) : zero; + OpFoldResult newLow = + hasLowPad ? min(max(zero, sub(low, offset)), length) : zero; newLows.push_back(newLow); // Start reading the data from position `offset - low`. Since the original @@ -554,21 +551,6 @@ OpFoldResult newLength = sub(endLoc, newOffset); newLengths.push_back(newLength); - // Check if newLength is zero. In that case, no SubTensorOp should be - // executed. - if (isConstantIntValue(newLength, 0)) { - hasZeroLen = true; - } else if (!hasZeroLen) { - Value check = b.create( - loc, arith::CmpIPredicate::eq, - getValueOrCreateConstantIndexOp(b, loc, newLength), - getValueOrCreateConstantIndexOp(b, loc, zero)); - dynHasZeroLenCond = - dynHasZeroLenCond - ? b.create(loc, check, dynHasZeroLenCond) - : check; - } - // The amount of high padding is simply the number of elements remaining, // so that the result has the same length as the original ExtractSliceOp. // As an optimization, if the original high padding is zero, then the new @@ -595,27 +577,14 @@ return b.create(loc, resultType, val); }; - // In cases where the original data source is unused: Emit a GenerateOp and - // do not generate a SliceOp. (The result shape of the SliceOp would - // have a dimension of size 0, the semantics of which is unclear.) - auto createGenerateOp = [&]() { - // Create GenerateOp. - auto generateOp = b.create( - loc, resultType, dynDims, - [&](OpBuilder &builder, Location gLoc, ValueRange indices) { - builder.create(gLoc, padValue); - }); - return generateOp; - }; - - // Emit a SliceOp and a PadOp. Should not be used in cases where - // the result shape of the new SliceOp has a zero dimension. + // Emit a SliceOp and a PadOp. auto createPadOfExtractSlice = [&]() { - // Create pad(extract_slice(x)). - Value newSliceOp = b.create( - loc, padOp.getSource(), newOffsets, newLengths, newStrides); + Value newSlice = + b.create(loc, padOp.getSource(), newOffsets, + newLengths, newStrides) + .getResult(); auto newPadOp = b.create( - loc, Type(), newSliceOp, newLows, newHighs, + loc, Type(), newSlice, newLows, newHighs, /*nofold=*/padOp.getNofold(), getPrunedAttributeList(padOp, PadOp::getAttributeNames())); @@ -627,33 +596,6 @@ return newPadOp; }; - // Rewrite extract_slice(pad(x)) into a GenerateOp it is statically known that - // the original data source x is not used. - if (hasZeroLen) { - Operation *generateOp = createGenerateOp(); - return TilingResult{{generateOp}, {castResult(generateOp->getResult(0))}}; - } - - // If there are dynamic dimensions: Generate an scf.if check to avoid - // creating SliceOps with result dimensions of size 0 at runtime. - if (generateZeroSliceGuard && dynHasZeroLenCond) { - Operation *thenOp; - Operation *elseOp; - auto result = b.create( - loc, dynHasZeroLenCond, - /*thenBuilder=*/ - [&](OpBuilder &b, Location loc) { - thenOp = createGenerateOp(); - b.create(loc, castResult(thenOp->getResult(0))); - }, - /*elseBuilder=*/ - [&](OpBuilder &b, Location loc) { - elseOp = createPadOfExtractSlice(); - b.create(loc, castResult(elseOp->getResult(0))); - }); - return TilingResult{{elseOp}, SmallVector(result->getResults())}; - } - Operation *newPadOp = createPadOfExtractSlice(); return TilingResult{{newPadOp}, {castResult(newPadOp->getResult(0))}}; } diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir --- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir +++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir @@ -18,9 +18,9 @@ // CHECK-LABEL: @static_high_pad_only // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice -// CHECK: %[[RESULT:.*]] = tensor.generate +// CHECK: %[[EMPTY:.*]] = tensor.extract_slice %[[ARG0]][4, 5] [0, 0] [1, 1] : tensor<4x5xf32> to tensor<0x0xf32> +// CHECK: %[[RESULT:.*]] = tensor.pad %[[EMPTY]] low[0, 0] high[2, 4] // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<2x4xf32> func.func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) @@ -37,9 +37,9 @@ // CHECK-LABEL: @static_low_pad_only // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice -// CHECK: %[[RESULT:.*]] = tensor.generate +// CHECK: %[[EMPTY:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [0, 0] [1, 1] : tensor<4x5xf32> to tensor<0x0xf32> +// CHECK: %[[RESULT:.*]] = tensor.pad %[[EMPTY]] low[2, 3] high[0, 0] // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<2x3xf32> func.func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) @@ -56,9 +56,9 @@ // CHECK-LABEL: @static_low_pad_only_2 // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice -// CHECK: %[[RESULT:.*]] = tensor.generate +// CHECK: %[[EMPTY:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [0, 0] [1, 1] : tensor<4x5xf32> to tensor<0x0xf32> +// CHECK: %[[RESULT:.*]] = tensor.pad %[[EMPTY]] low[1, 3] high[0, 0] // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<1x3xf32> func.func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32) @@ -134,15 +134,9 @@ // CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: tensor.dim %[[ARG0]], %[[C0]] -// CHECK: %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<3x4xf32>) { -// CHECK: %[[GEN:.*]] = tensor.generate -// CHECK: scf.yield %[[GEN]] -// CHECK: } else { -// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor -// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] -// CHECK: scf.yield %[[PADTENSOR]] -// CHECK: } -// CHECK: return %[[RESULT]] +// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor +// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] +// CHECK: return %[[PADTENSOR]] func.func @dynamic_high_pad(%arg0 : tensor, %h1: index, %pad : f32) -> tensor<3x4xf32> { %0 = tensor.pad %arg0 low[0, 0] high[%h1, 8] { ^bb0(%arg1: index, %arg2: index): @@ -159,15 +153,9 @@ // CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: tensor.dim %[[ARG0]], %[[C0]] -// CHECK: %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor) { -// CHECK: %[[GEN:.*]] = tensor.generate %[[ARG1]] -// CHECK: scf.yield %[[GEN]] -// CHECK: } else { -// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor -// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] -// CHECK: scf.yield %[[PADTENSOR]] -// CHECK: } -// CHECK: return %[[RESULT]] +// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor +// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] +// CHECK: return %[[PADTENSOR]] func.func @dynamic_extract_size(%arg0 : tensor, %s1: index, %pad : f32) -> tensor { %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): @@ -180,11 +168,8 @@ // ----- // CHECK-LABEL: @dynamic_zero_low_padding -// CHECK: scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice -// CHECK: tensor.pad %[[SLICE]] low[0, 0] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice +// CHECK: tensor.pad %[[SLICE]] low[0, 0] func.func @dynamic_zero_low_padding(%arg0 : tensor, %pad : f32, %o1 : index, %o2 : index, %s1 : index, %s2 : index) @@ -200,11 +185,8 @@ // ----- // CHECK-LABEL: @dynamic_zero_high_padding -// CHECK: scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice -// CHECK: tensor.pad %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice +// CHECK: tensor.pad %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0] func.func @dynamic_zero_high_padding(%arg0 : tensor, %pad : f32, %o1 : index, %o2 : index, %s1 : index, %s2 : index) diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -273,15 +273,9 @@ // CHECK-DAG: %[[C64:.*]] = arith.constant 64 : index // CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index // CHECK: scf.for %{{.*}} = %[[C0]] to %[[C64]] step %[[C16]] -// CHECK: %[[CMPI1:.*]] = arith.cmpi eq // CHECK: scf.for %{{.*}} = %[[C0]] to %[[C128]] step %[[C32]] -// CHECK: %[[CMPI2:.*]] = arith.cmpi eq -// CHECK: %[[HASZERO:.*]] = arith.ori %[[CMPI2]], %[[CMPI1]] : i1 -// CHECK: scf.if %[[HASZERO]] -// CHECK: tensor.generate -// CHECK: else -// CHECK: tensor.extract_slice -// CHECK: tensor.pad +// CHECK: tensor.extract_slice +// CHECK: tensor.pad // CHECK: tensor.extract_slice // CHECK: tensor.extract_slice // CHECK: linalg.generic diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir --- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir @@ -132,10 +132,7 @@ -> tensor<20x40xf32> { // CHECK: scf.forall - // CHECK: scf.if - // CHECK: tensor.generate - // CHECK: else - // CHECK: tensor.pad {{.*}} nofold + // CHECK: tensor.pad {{.*}} nofold %0 = tensor.pad %arg0 nofold low[%low, %low] high[%high, %high] { ^bb0(%arg9: index, %arg10: index): tensor.yield %cst : f32 diff --git a/mlir/test/Dialect/Tensor/tiling.mlir b/mlir/test/Dialect/Tensor/tiling.mlir --- a/mlir/test/Dialect/Tensor/tiling.mlir +++ b/mlir/test/Dialect/Tensor/tiling.mlir @@ -14,12 +14,9 @@ // CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] // CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]] // CHECK: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[SWAP_RESULT:.*]] = scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] -// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] +// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // CHECK: return %[[RESULT]] func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor, @@ -51,12 +48,9 @@ // CHECK-DAG: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] // CHECK-DAG: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]] // CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[SWAP_RESULT:.*]] = scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] -// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[{{.*}}, %{{.*}}] high[{{.*}}, {{.*}}] +// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] // CHECK: return %[[RESULT]] func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor, @@ -85,12 +79,9 @@ // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] // CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[SWAP_RESULT:.*]] = scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] -// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] +// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // CHECK: return %[[RESULT]] func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>, @@ -116,13 +107,9 @@ // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[SWAP_RESULT:.*]] = scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] -// CHECK: %[[CAST_SWAP_RESULT:.*]] = tensor.cast %[[SWAP_RESULT]] : tensor to tensor<15x?xf32> -// CHECK: tensor.insert_slice %[[CAST_SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] +// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] // CHECK: return %[[RESULT]] func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>, @@ -148,17 +135,9 @@ // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index // CHECK: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[R2:.*]] = scf.if -// CHECK: %[[GEN:.*]] = tensor.generate -// CHECK: %[[cast_0:.*]] = tensor.cast %[[GEN]] : tensor<14x3xf32> to tensor -// CHECK: scf.yield %[[cast_0]] : tensor -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] -// CHECK: %[[cast_1:.*]] = tensor.cast %[[PAD]] : tensor<14x?xf32> to tensor -// CHECK: scf.yield %[[cast_1]] : tensor -// CHECK: %[[cast:.*]] = tensor.cast %[[R2]] : tensor to tensor<14x3xf32> -// CHECK: %[[R3:.*]] = tensor.insert_slice %[[cast]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] +// CHECK: %[[R3:.*]] = tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> // CHECK: scf.yield %[[R3]] : tensor<14x15xf32> // CHECK: return %[[RESULT]] : tensor<14x15xf32> diff --git a/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir @@ -23,12 +23,9 @@ // CHECK: %[[DIM1:.+]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] // CHECK: %[[RESULT:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[DIM0]] step %[[C2]] // CHECK: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[SWAP_RESULT:.*]] = scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] -// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] +// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // CHECK: return %[[RESULT]] // ----- @@ -53,12 +50,9 @@ // CHECK: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] // CHECK: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] // CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[SWAP_RESULT:.*]] = scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] -// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][%[[C0]], {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[{{.*}}, %{{.*}}] high[{{.*}}, {{.*}}] +// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][%[[C0]], {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] // CHECK: return %[[RESULT]] // ----- @@ -80,12 +74,9 @@ // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] // CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[SWAP_RESULT:.*]] = scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] -// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] +// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // CHECK: return %[[RESULT]] // ----- @@ -105,12 +96,10 @@ // CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// CHECK: %[[SWAP_RESULT:.*]] = scf.if -// CHECK: tensor.generate -// CHECK: else -// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] -// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] -// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][%[[C0]], {{.*}}] [%[[C15]], {{.*}}] [1, 1] +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] +// CHECK: %[[CAST:.*]] = tensor.cast %[[PAD]] +// CHECK: tensor.insert_slice %[[CAST]] into %[[INNER_OUT]][%[[C0]], {{.*}}] [%[[C15]], {{.*}}] [1, 1] // CHECK: return %[[RESULT]] /// Rest of the tests only check that they dont fail.