diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -719,15 +719,9 @@ auto sourceType = padOp.getSourceType(); - // Copy of source with static shape can be vectorized. - if (sourceType.hasStaticShape()) { - auto vecType = VectorType::get(sourceType.getShape(), - sourceType.getElementType()); - vectorizeStaticShapeSource(rewriter, padOp, fill, vecType); + // Try vectorizing the copy of source. + if (tryVectorizeCopy(rewriter, padOp, padValue, fill).succeeded()) return success(); - } - - // TODO: Vectorize dynamic source but static destination. // Neither source type nor PadTensorOp result type have static shape. Such // PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead. @@ -751,23 +745,57 @@ return success(); } - /// Vectorize the copying of a PadTensorOp's source that has static shape. - void vectorizeStaticShapeSource(PatternRewriter &rewriter, PadTensorOp padOp, - Value dest, VectorType vecType) const { + /// Vectorize the copying of a PadTensorOp's source. This is possible if each + /// dimension size is statically know in the source type or the result type + /// (or both). + LogicalResult tryVectorizeCopy(PatternRewriter &rewriter, PadTensorOp padOp, + Value padValue, Value dest) const { + auto sourceType = padOp.getSourceType(); + auto resultType = padOp.getResultType(); + + SmallVector vecShape; + SmallVector readInBounds; + SmallVector writeInBounds; + for (unsigned i = 0; i < sourceType.getRank(); ++i) { + if (!sourceType.isDynamicDim(i)) { + vecShape.push_back(sourceType.getDimSize(i)); + // Source shape is statically known: Neither read nor write are out-of- + // bounds. + readInBounds.push_back(true); + writeInBounds.push_back(true); + } else if (!resultType.isDynamicDim(i)) { + // Source shape is not statically known, but result shape is. Vectorize + // with size of result shape. This may be larger than the source size. + vecShape.push_back(resultType.getDimSize(i)); + // Read may be out-of-bounds because the result size could be larger + // than the source size. + readInBounds.push_back(false); + // Write is out-of-bounds if low padding > 0. + writeInBounds.push_back( + isEqualConstantIntOrValue(padOp.getMixedLowPad()[i], + rewriter.getIndexAttr(0))); + } else { + // Neither source nor result dim of padOp is static. Cannot vectorize + // the copy. + return failure(); + } + } + auto vecType = VectorType::get(vecShape, sourceType.getElementType()); + // Generate TransferReadOp. SmallVector readIndices( vecType.getRank(), rewriter.create(padOp.getLoc(), 0)); auto read = rewriter.create( - padOp.getLoc(), vecType, padOp.source(), readIndices); + padOp.getLoc(), vecType, padOp.source(), readIndices, padValue, + readInBounds); - // Generate TransferWriteOp. The destination dimensions may be dynamic, but - // the write cannot be out-of-bounds. (A large enough destination tensor is - // allocated in this pattern.) + // Generate TransferWriteOp. auto writeIndices = ofrToIndexValues( rewriter, padOp.getLoc(), padOp.getMixedLowPad()); - SmallVector inBounds(vecType.getRank(), true); rewriter.replaceOpWithNewOp( - padOp, read, dest, writeIndices, inBounds); + padOp, read, dest, writeIndices, writeInBounds); + + return success(); } }; diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -515,12 +515,13 @@ // CHECK-LABEL: func @pad_static( // CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32 // CHECK-NOT: linalg.pad_tensor -// CHECK-DAG: %[[C1:.*]] = constant 1 : index +// CHECK-DAG: %[[C0:.*]] = constant 0 : index +// CHECK-DAG: %[[C2:.*]] = constant 2 : index // CHECK-DAG: %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32> // CHECK-DAG: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32> // CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32> -// CHECK-DAG: %[[DIM1:.*]] = memref.dim %[[ARG0]], %[[C1]] -// CHECK: %[[RESULT:.*]] = subtensor_insert %[[ARG0]] into %2[0, 0, 2] [2, %[[DIM1]], 2] [1, 1, 1] : tensor<2x?x2xf32> into tensor<2x3x4xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32> +// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32> // CHECK: return %[[RESULT]] func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> { %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {