diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -179,6 +179,9 @@ LLVM_DEBUG(llvm::interleaveComma(canonicalVecShape, llvm::dbgs())); LLVM_DEBUG(llvm::dbgs() << "\n"); + if (ShapedType::isDynamicShape(canonicalVecShape)) + return failure(); + // Initialize iteration space static sizes. initIterSpaceStaticSizes(linalgOp); @@ -187,8 +190,6 @@ if (failed(precomputeIterSpaceDynamicSizes(rewriter, linalgOp))) return failure(); - if (ShapedType::isDynamicShape(canonicalVecShape)) - return failure(); return success(); } diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -1750,3 +1750,34 @@ transform.structured.masked_vectorize %0 vector_sizes [4, 8] } +// ----- + +// This is a regression test. This IR cannot be vectorized, but +// structured.vectorize should nevertheless succeed. + +#map = affine_map<(d0) -> (d0)> +// CHECK-LABEL: @not_vectorizable +func.func @not_vectorizable(%arg0: tensor<1x?xf32>, %arg1: index, %arg2: index, %arg3: index) -> tensor<1x128xf32> { + %0 = tensor.empty() : tensor<1x128xf32> + %1 = scf.for %arg5 = %arg2 to %arg1 step %arg3 iter_args(%arg6 = %0) -> (tensor<1x128xf32>) { + %extracted_slice = tensor.extract_slice %arg6[0, 0] [1, %arg1] [1, 1] : tensor<1x128xf32> to tensor + %expanded = tensor.expand_shape %extracted_slice [[0, 1]] : tensor into tensor<1x?xf32> + %extracted_slice_0 = tensor.extract_slice %arg0[0, %arg3] [1, %arg2] [1, 1] : tensor<1x?xf32> to tensor + %extracted_slice_1 = tensor.extract_slice %expanded[0, %arg3] [1, %arg2] [1, 1] : tensor<1x?xf32> to tensor + %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%extracted_slice_0 : tensor) outs(%extracted_slice_1 : tensor) { + ^bb0(%in: f32, %out: f32): + %3 = arith.addf %in, %out : f32 + linalg.yield %3 : f32 + } -> tensor + %inserted_slice = tensor.insert_slice %2 into %expanded[0, %arg3] [1, %arg2] [1, 1] : tensor into tensor<1x?xf32> + %collapsed = tensor.collapse_shape %inserted_slice [[0, 1]] : tensor<1x?xf32> into tensor + %inserted_slice_2 = tensor.insert_slice %collapsed into %arg6[0, 0] [1, %arg1] [1, 1] : tensor into tensor<1x128xf32> + scf.yield %inserted_slice_2 : tensor<1x128xf32> + } + return %1 : tensor<1x128xf32> +} +transform.sequence failures(propagate) { +^bb0(%arg0: !pdl.operation): + %0 = transform.structured.match ops{["func.func"]} in %arg0 + %1 = transform.structured.vectorize %0 +}