Index: mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir =================================================================== --- mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir +++ mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 \ -// RUN: -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-opt %s -test-vector-distribute-patterns -convert-vector-to-scf \ +// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \ // RUN: FileCheck %s @@ -19,29 +19,28 @@ return %0 : memref } -func @vector_add_cycle(%id : index, %A: memref, %B: memref, %C: memref) { - %c0 = constant 0 : index +#map0 = affine_map<()[s0] -> (s0 * 2)> + +// Loop over a vector add being distributed into a loop of vec2 and make sure +// distribution is being propagated. +func @main() { %cf0 = constant 0.0 : f32 - %a = vector.transfer_read %A[%c0], %cf0: memref, vector<64xf32> - %b = vector.transfer_read %B[%c0], %cf0: memref, vector<64xf32> - %acc = addf %a, %b: vector<64xf32> - vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref - return -} - -// Loop over a function containinng a large add vector and distribute it so that -// each iteration of the loop process part of the vector operation. -func @main() { %cf1 = constant 1.0 : f32 %cf2 = constant 2.0 : f32 %c0 = constant 0 : index %c1 = constant 1 : index + %c32 = constant 32 : index %c64 = constant 64 : index %out = alloc(%c64) : memref %in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref %in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref - scf.for %arg5 = %c0 to %c64 step %c1 { - call @vector_add_cycle(%arg5, %in1, %in2, %out) : (index, memref, memref, memref) -> () + scf.for %arg5 = %c0 to %c32 step %c1 { + %a = vector.transfer_read %in1[%c0], %cf0: memref, vector<64xf32> + %b = vector.transfer_read %in2[%c0], %cf0: memref, vector<64xf32> + %acc = addf %a, %b: vector<64xf32> + %ext = vector.extract_map %acc[%arg5 : 32] : vector<64xf32> to vector<2xf32> + %id = affine.apply #map0()[%arg5] + vector.transfer_write %ext, %out[%id]: vector<2xf32>, memref } %converted = memref_cast %out : memref to memref<*xf32> call @print_memref_f32(%converted): (memref<*xf32>) -> () Index: mlir/lib/Dialect/Vector/VectorTransforms.cpp =================================================================== --- mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -2474,9 +2474,13 @@ return failure(); edsc::ScopedContext scope(rewriter, read.getLoc()); using mlir::edsc::op::operator+; + using mlir::edsc::op::operator*; using namespace mlir::edsc::intrinsics; SmallVector indices(read.indices().begin(), read.indices().end()); - indices.back() = indices.back() + extract.id(); + indices.back() = + indices.back() + + (extract.id() * + std_constant_index(extract.getResultType().getDimSize(0))); Value newRead = vector_transfer_read(extract.getType(), read.memref(), indices, read.permutation_map(), read.padding(), ArrayAttr()); @@ -2498,10 +2502,14 @@ return failure(); edsc::ScopedContext scope(rewriter, write.getLoc()); using mlir::edsc::op::operator+; + using mlir::edsc::op::operator*; using namespace mlir::edsc::intrinsics; SmallVector indices(write.indices().begin(), write.indices().end()); - indices.back() = indices.back() + insert.id(); + indices.back() = + indices.back() + + (insert.id() * + std_constant_index(insert.getSourceVectorType().getDimSize(0))); vector_transfer_write(insert.vector(), write.memref(), indices, write.permutation_map(), ArrayAttr()); rewriter.eraseOp(write); Index: mlir/test/Dialect/Vector/vector-distribution.mlir =================================================================== --- mlir/test/Dialect/Vector/vector-distribution.mlir +++ mlir/test/Dialect/Vector/vector-distribution.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 | FileCheck %s +// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 -split-input-file | FileCheck %s // CHECK-LABEL: func @distribute_vector_add // CHECK-SAME: (%[[ID:.*]]: index @@ -12,6 +12,8 @@ return %0: vector<32xf32> } +// ----- + // CHECK-LABEL: func @vector_add_read_write // CHECK-SAME: (%[[ID:.*]]: index // CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32> @@ -33,12 +35,19 @@ return } -// CHECK-LABEL: func @vector_add_cycle +// ----- + +// CHECK-DAG: #[[MAP0:map[0-9]+]] = affine_map<()[s0] -> (s0 * 2)> + +// CHECK: func @vector_add_cycle // CHECK-SAME: (%[[ID:.*]]: index -// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<64xf32>, vector<2xf32> -// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<64xf32>, vector<2xf32> +// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID]]] +// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]]], %{{.*}} : memref<64xf32>, vector<2xf32> +// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID]]] +// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]]], %{{.*}} : memref<64xf32>, vector<2xf32> // CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<2xf32> -// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID]]] : vector<2xf32>, memref<64xf32> +// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID]]] +// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]]] : vector<2xf32>, memref<64xf32> // CHECK-NEXT: return func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) { %c0 = constant 0 : index @@ -50,6 +59,8 @@ return } +// ----- + // Negative test to make sure nothing is done in case the vector size is not a // multiple of multiplicity. // CHECK-LABEL: func @vector_negative_test Index: mlir/test/lib/Transforms/TestVectorTransforms.cpp =================================================================== --- mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -136,20 +136,23 @@ Option multiplicity{ *this, "distribution-multiplicity", llvm::cl::desc("Set the multiplicity used for distributing vector"), - llvm::cl::init(32)}; + llvm::cl::init(1)}; void runOnFunction() override { MLIRContext *ctx = &getContext(); OwningRewritePatternList patterns; FuncOp func = getFunction(); - func.walk([&](AddFOp op) { - OpBuilder builder(op); - Optional ops = distributPointwiseVectorOp( - builder, op.getOperation(), func.getArgument(0), multiplicity); - if (ops.hasValue()) { - SmallPtrSet extractOp({ops->extract}); - op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp); - } - }); + if (multiplicity > 1) { + func.walk([&](AddFOp op) { + OpBuilder builder(op); + Optional ops = distributPointwiseVectorOp( + builder, op.getOperation(), func.getArgument(0), multiplicity); + if (ops.hasValue()) { + SmallPtrSet extractOp({ops->extract}); + op.getResult().replaceAllUsesExcept(ops->insert.getResult(), + extractOp); + } + }); + } patterns.insert(ctx); populateVectorToVectorTransformationPatterns(patterns, ctx); applyPatternsAndFoldGreedily(getFunction(), patterns);