Index: mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir =================================================================== --- mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir +++ mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir @@ -1,9 +1,18 @@ -// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 \ -// RUN: -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-opt %s -test-vector-to-forloop -convert-vector-to-scf \ +// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \ // RUN: FileCheck %s +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \ +// RUN: -convert-scf-to-std -convert-vector-to-llvm | mlir-cpu-runner -e main \ +// RUN: -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \ +// RUN: FileCheck %s + +// RUN: mlir-opt %s -test-vector-to-forloop | FileCheck %s -check-prefix=TRANSFORM + + func @print_memref_f32(memref<*xf32>) func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref { @@ -19,30 +28,29 @@ return %0 : memref } -func @vector_add_cycle(%id : index, %A: memref, %B: memref, %C: memref) { - %c0 = constant 0 : index +// Large vector addf that can be broken down into a loop of smaller vector addf. +func @main() { %cf0 = constant 0.0 : f32 - %a = vector.transfer_read %A[%c0], %cf0: memref, vector<64xf32> - %b = vector.transfer_read %B[%c0], %cf0: memref, vector<64xf32> - %acc = addf %a, %b: vector<64xf32> - vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref - return -} - -// Loop over a function containinng a large add vector and distribute it so that -// each iteration of the loop process part of the vector operation. -func @main() { %cf1 = constant 1.0 : f32 %cf2 = constant 2.0 : f32 %c0 = constant 0 : index %c1 = constant 1 : index + %c32 = constant 32 : index %c64 = constant 64 : index %out = alloc(%c64) : memref %in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref %in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref - scf.for %arg5 = %c0 to %c64 step %c1 { - call @vector_add_cycle(%arg5, %in1, %in2, %out) : (index, memref, memref, memref) -> () - } + // Check that the tansformatio correctly happened. + // TRANSFORM: scf.for + // TRANSFORM: vector.transfer_read {{.*}} : memref, vector<2xf32> + // TRANSFORM: vector.transfer_read {{.*}} : memref, vector<2xf32> + // TRANSFORM: %{{.*}} = addf %{{.*}}, %{{.*}} : vector<2xf32> + // TRANSFORM: vector.transfer_write {{.*}} : vector<2xf32>, memref + // TRANSFORM: } + %a = vector.transfer_read %in1[%c0], %cf0: memref, vector<64xf32> + %b = vector.transfer_read %in2[%c0], %cf0: memref, vector<64xf32> + %acc = addf %a, %b: vector<64xf32> + vector.transfer_write %acc, %out[%c0]: vector<64xf32>, memref %converted = memref_cast %out : memref to memref<*xf32> call @print_memref_f32(%converted): (memref<*xf32>) -> () // CHECK: Unranked{{.*}}data = Index: mlir/lib/Dialect/Vector/VectorTransforms.cpp =================================================================== --- mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -2526,9 +2526,13 @@ return failure(); edsc::ScopedContext scope(rewriter, read.getLoc()); using mlir::edsc::op::operator+; + using mlir::edsc::op::operator*; using namespace mlir::edsc::intrinsics; SmallVector indices(read.indices().begin(), read.indices().end()); - indices.back() = indices.back() + extract.id(); + indices.back() = + indices.back() + + (extract.id() * + std_constant_index(extract.getResultType().getDimSize(0))); Value newRead = vector_transfer_read(extract.getType(), read.memref(), indices, read.permutation_map(), read.padding(), ArrayAttr()); @@ -2552,10 +2556,14 @@ return failure(); edsc::ScopedContext scope(rewriter, write.getLoc()); using mlir::edsc::op::operator+; + using mlir::edsc::op::operator*; using namespace mlir::edsc::intrinsics; SmallVector indices(write.indices().begin(), write.indices().end()); - indices.back() = indices.back() + insert.id(); + indices.back() = + indices.back() + + (insert.id() * + std_constant_index(insert.getSourceVectorType().getDimSize(0))); vector_transfer_write(insert.vector(), write.memref(), indices, write.permutation_map(), ArrayAttr()); rewriter.eraseOp(write); Index: mlir/test/Dialect/Vector/vector-distribution.mlir =================================================================== --- mlir/test/Dialect/Vector/vector-distribution.mlir +++ mlir/test/Dialect/Vector/vector-distribution.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 | FileCheck %s +// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32 -split-input-file | FileCheck %s // CHECK-LABEL: func @distribute_vector_add // CHECK-SAME: (%[[ID:.*]]: index @@ -13,6 +13,8 @@ return %0: vector<32xf32> } +// ----- + // CHECK-LABEL: func @vector_add_read_write // CHECK-SAME: (%[[ID:.*]]: index // CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32> @@ -34,12 +36,19 @@ return } -// CHECK-LABEL: func @vector_add_cycle +// ----- + +// CHECK-DAG: #[[MAP0:map[0-9]+]] = affine_map<()[s0] -> (s0 * 2)> + +// CHECK: func @vector_add_cycle // CHECK-SAME: (%[[ID:.*]]: index -// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<64xf32>, vector<2xf32> -// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<64xf32>, vector<2xf32> +// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID]]] +// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]]], %{{.*}} : memref<64xf32>, vector<2xf32> +// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID]]] +// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]]], %{{.*}} : memref<64xf32>, vector<2xf32> // CHECK-NEXT: %[[ADD:.*]] = addf %[[EXA]], %[[EXB]] : vector<2xf32> -// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID]]] : vector<2xf32>, memref<64xf32> +// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID]]] +// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]]] : vector<2xf32>, memref<64xf32> // CHECK-NEXT: return func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) { %c0 = constant 0 : index @@ -51,6 +60,8 @@ return } +// ----- + // Negative test to make sure nothing is done in case the vector size is not a // multiple of multiplicity. // CHECK-LABEL: func @vector_negative_test Index: mlir/test/lib/Transforms/TestVectorTransforms.cpp =================================================================== --- mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -8,6 +8,7 @@ #include +#include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/SCF/SCF.h" @@ -156,6 +157,64 @@ } }; +struct TestVectorToLoopPatterns + : public PassWrapper { + TestVectorToLoopPatterns() = default; + TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass) {} + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + registry.insert(); + } + Option multiplicity{ + *this, "distribution-multiplicity", + llvm::cl::desc("Set the multiplicity used for distributing vector"), + llvm::cl::init(32)}; + void runOnFunction() override { + MLIRContext *ctx = &getContext(); + OwningRewritePatternList patterns; + FuncOp func = getFunction(); + func.walk([&](AddFOp op) { + // Check that the operation type can be broken down into a loop. + VectorType type = op.getType().dyn_cast(); + if (!type || type.getRank() != 1 || + type.getNumElements() % multiplicity != 0) + return mlir::WalkResult::advance(); + auto filterAlloc = [](Operation *op) { + if (isa(op)) + return false; + return true; + }; + auto dependentOps = getSlice(op, filterAlloc); + // Create a loop and move instructions from the Op slice into the loop. + OpBuilder builder(op); + auto zero = builder.create( + op.getLoc(), builder.getIndexType(), + builder.getIntegerAttr(builder.getIndexType(), 0)); + auto one = builder.create( + op.getLoc(), builder.getIndexType(), + builder.getIntegerAttr(builder.getIndexType(), 1)); + auto numIter = builder.create( + op.getLoc(), builder.getIndexType(), + builder.getIntegerAttr(builder.getIndexType(), multiplicity)); + auto forOp = builder.create(op.getLoc(), zero, numIter, one); + for (Operation *it : dependentOps) { + it->moveBefore(forOp.getBody()->getTerminator()); + } + // break up the original op and let the patterns propagate. + Optional ops = distributPointwiseVectorOp( + builder, op.getOperation(), forOp.getInductionVar(), multiplicity); + if (ops.hasValue()) { + SmallPtrSet extractOp({ops->extract, ops->insert}); + op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp); + } + return mlir::WalkResult::interrupt(); + }); + patterns.insert(ctx); + populateVectorToVectorTransformationPatterns(patterns, ctx); + applyPatternsAndFoldGreedily(getFunction(), patterns); + } +}; + struct TestVectorTransferUnrollingPatterns : public PassWrapper { void getDependentDialects(DialectRegistry ®istry) const override { @@ -235,5 +294,8 @@ "test-vector-distribute-patterns", "Test conversion patterns to distribute vector ops in the vector " "dialect"); + PassRegistration vectorToForLoop( + "test-vector-to-forloop", + "Test conversion patterns to break up a vector op into a for loop"); } } // namespace mlir