Index: mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir =================================================================== --- mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir +++ mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 \ -// RUN: -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-opt %s -test-vector-distribute-patterns -convert-vector-to-scf \ +// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \ // RUN: FileCheck %s @@ -19,19 +19,10 @@ return %0 : memref } -func @vector_add_cycle(%id : index, %A: memref, %B: memref, %C: memref) { - %c0 = constant 0 : index +// Loop over a vector add being distributed into a loop of vec2 and make sure +// distribution is being propagated. +func @main() { %cf0 = constant 0.0 : f32 - %a = vector.transfer_read %A[%c0], %cf0: memref, vector<64xf32> - %b = vector.transfer_read %B[%c0], %cf0: memref, vector<64xf32> - %acc = addf %a, %b: vector<64xf32> - vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref - return -} - -// Loop over a function containinng a large add vector and distribute it so that -// each iteration of the loop process part of the vector operation. -func @main() { %cf1 = constant 1.0 : f32 %cf2 = constant 2.0 : f32 %c0 = constant 0 : index @@ -41,7 +32,12 @@ %in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref %in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref scf.for %arg5 = %c0 to %c64 step %c1 { - call @vector_add_cycle(%arg5, %in1, %in2, %out) : (index, memref, memref, memref) -> () + %a = vector.transfer_read %in1[%c0], %cf0: memref, vector<64xf32> + %b = vector.transfer_read %in2[%c0], %cf0: memref, vector<64xf32> + %acc = addf %a, %b: vector<64xf32> + %ext = vector.extract_map %acc[%arg5 : 32] : vector<64xf32> to vector<2xf32> + %ins = vector.insert_map %ext, %arg5, 32 : vector<2xf32> to vector<64xf32> + vector.transfer_write %ins, %out[%c0]: vector<64xf32>, memref } %converted = memref_cast %out : memref to memref<*xf32> call @print_memref_f32(%converted): (memref<*xf32>) -> () Index: mlir/test/lib/Transforms/TestVectorTransforms.cpp =================================================================== --- mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -136,20 +136,23 @@ Option multiplicity{ *this, "distribution-multiplicity", llvm::cl::desc("Set the multiplicity used for distributing vector"), - llvm::cl::init(32)}; + llvm::cl::init(1)}; void runOnFunction() override { MLIRContext *ctx = &getContext(); OwningRewritePatternList patterns; FuncOp func = getFunction(); - func.walk([&](AddFOp op) { - OpBuilder builder(op); - Optional ops = distributPointwiseVectorOp( - builder, op.getOperation(), func.getArgument(0), multiplicity); - if (ops.hasValue()) { - SmallPtrSet extractOp({ops->extract}); - op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp); - } - }); + if (multiplicity > 1) { + func.walk([&](AddFOp op) { + OpBuilder builder(op); + Optional ops = distributPointwiseVectorOp( + builder, op.getOperation(), func.getArgument(0), multiplicity); + if (ops.hasValue()) { + SmallPtrSet extractOp({ops->extract}); + op.getResult().replaceAllUsesExcept(ops->insert.getResult(), + extractOp); + } + }); + } patterns.insert(ctx); populateVectorToVectorTransformationPatterns(patterns, ctx); applyPatternsAndFoldGreedily(getFunction(), patterns);