diff --git a/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp --- a/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp @@ -71,8 +71,22 @@ /// Given a vector transfer op, calculate which dimension of the `source` /// memref should be unpacked in the next application of TransferOpConversion. template -static int64_t unpackedDim(OpTy xferOp) { - return xferOp.getShapedType().getRank() - xferOp.getVectorType().getRank(); +static unsigned unpackedDim(OpTy xferOp) { + auto map = xferOp.permutation_map(); + // TODO: Handle broadcast + auto expr = map.getResult(0).template dyn_cast(); + assert(expr && "Expected AffineDimExpr in permutation map result"); + return expr.getPosition(); +} + +/// Compute the permutation map for the new (N-1)-D vector transfer op. This +/// map is identical to the current permutation map, but the first result is +/// omitted. +template +static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) { + auto map = xferOp.permutation_map(); + return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(), + builder.getContext()); } /// Calculate the indices for the new vector transfer op. @@ -93,8 +107,8 @@ indices[dim] = adaptor.indices()[dim] + iv; } -static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc, - Value value) { +static void maybeYieldValue( + bool hasRetVal, OpBuilder builder, Location loc, Value value) { if (hasRetVal) { builder.create(loc, value); } else { @@ -124,7 +138,7 @@ /// `resultTypes`. template static Value generateInBoundsCheck( - OpTy xferOp, Value iv, OpBuilder &builder, int64_t dim, + OpTy xferOp, Value iv, OpBuilder &builder, unsigned dim, TypeRange resultTypes, function_ref inBoundsCase, function_ref outOfBoundsCase = nullptr) { @@ -136,19 +150,15 @@ auto cond = std_cmpi_sgt(memrefDim.value, memrefIdx); auto check = builder.create( xferOp.getLoc(), resultTypes, cond, - /*thenBuilder=*/ - [&](OpBuilder &builder, Location loc) { - maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc)); - }, - /*elseBuilder=*/ - [&](OpBuilder &builder, Location loc) { - if (outOfBoundsCase) { - maybeYieldValue(hasRetVal, builder, loc, - outOfBoundsCase(builder, loc)); - } else { - builder.create(loc); - } - }); + /*thenBuilder=*/[&](OpBuilder &builder, Location loc) { + maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc)); + }, /*elseBuilder=*/[&](OpBuilder &builder, Location loc) { + if (outOfBoundsCase) { + maybeYieldValue(hasRetVal, builder, loc, outOfBoundsCase(builder, loc)); + } else { + builder.create(loc); + } + }); return hasRetVal ? check.getResult(0) : Value(); } @@ -166,15 +176,13 @@ function_ref outOfBoundsCase = nullptr) { generateInBoundsCheck( xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(), - /*inBoundsCase=*/ - [&](OpBuilder &builder, Location loc) { + /*inBoundsCase=*/[&](OpBuilder &builder, Location loc) { inBoundsCase(builder, loc); return Value(); }, - /*outOfBoundsCase=*/ - [&](OpBuilder &builder, Location loc) { + /*outOfBoundsCase=*/[&](OpBuilder &builder, Location loc) { if (outOfBoundsCase) - outOfBoundsCase(builder, loc); + outOfBoundsCase(builder, loc); return Value(); }); } @@ -182,7 +190,7 @@ /// Given an ArrayAttr, return a copy where the first element is dropped. static ArrayAttr dropFirstElem(PatternRewriter &rewriter, ArrayAttr attr) { if (!attr) - return attr; + return attr; return ArrayAttr::get(rewriter.getContext(), attr.getValue().drop_front()); } @@ -191,13 +199,14 @@ struct Strategy; /// Code strategy for vector TransferReadOp. -template <> +template<> struct Strategy { /// Find the StoreOp that is used for writing the current TransferReadOp's /// result to the temporary buffer allocation. static memref::StoreOp getStoreOp(TransferReadOp xferOp) { assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp"); - auto storeOp = dyn_cast((*xferOp->use_begin()).getOwner()); + auto storeOp = dyn_cast( + (*xferOp->use_begin()).getOwner()); assert(storeOp && "Expected TransferReadOp result used by StoreOp"); return storeOp; } @@ -215,7 +224,7 @@ /// Retrieve the indices of the current StoreOp. static void getStoreIndices(TransferReadOp xferOp, - SmallVector &indices) { + SmallVector &indices) { auto storeOp = getStoreOp(xferOp); auto prevIndices = memref::StoreOpAdaptor(storeOp).indices(); indices.append(prevIndices.begin(), prevIndices.end()); @@ -258,24 +267,25 @@ auto bufferType = buffer.getType().dyn_cast(); auto vecType = bufferType.getElementType().dyn_cast(); - auto map = getTransferMinorIdentityMap(xferOp.getShapedType(), vecType); auto inBoundsAttr = dropFirstElem(rewriter, xferOp.in_boundsAttr()); - auto newXfer = vector_transfer_read(vecType, xferOp.source(), xferIndices, - AffineMapAttr::get(map), - xferOp.padding(), Value(), inBoundsAttr) - .value; + auto newXfer = + vector_transfer_read( + vecType, xferOp.source(), xferIndices, + AffineMapAttr::get(unpackedPermutationMap(xferOp, rewriter)), + xferOp.padding(), Value(), inBoundsAttr) + .value; if (vecType.getRank() > kTargetRank) - newXfer.getDefiningOp()->setAttr(kPassLabel, rewriter.getUnitAttr()); + newXfer.getDefiningOp()->setAttr(kPassLabel, rewriter.getUnitAttr()); memref_store(newXfer, buffer, storeIndices); } /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write /// padding value to the temporary buffer. - static void handleOutOfBoundsDim(PatternRewriter &rewriter, - TransferReadOp xferOp, Value buffer, - Value iv) { + static void handleOutOfBoundsDim( + PatternRewriter &rewriter, TransferReadOp xferOp, Value buffer, + Value iv) { SmallVector storeIndices; getStoreIndices(xferOp, storeIndices); storeIndices.push_back(iv); @@ -294,7 +304,7 @@ }; /// Codegen strategy for vector TransferWriteOp. -template <> +template<> struct Strategy { /// Find the temporary buffer allocation. All labeled TransferWriteOps are /// used like this, where %buf is either the buffer allocation or a type cast @@ -337,20 +347,20 @@ auto vec = memref_load(buffer, loadIndices); auto vecType = vec.value.getType().dyn_cast(); - auto map = getTransferMinorIdentityMap(xferOp.getShapedType(), vecType); auto inBoundsAttr = dropFirstElem(rewriter, xferOp.in_boundsAttr()); - auto newXfer = - vector_transfer_write(Type(), vec, xferOp.source(), xferIndices, - AffineMapAttr::get(map), Value(), inBoundsAttr); + auto newXfer = vector_transfer_write( + Type(), vec, xferOp.source(), xferIndices, + AffineMapAttr::get(unpackedPermutationMap(xferOp, rewriter)), Value(), + inBoundsAttr); if (vecType.getRank() > kTargetRank) - newXfer.op->setAttr(kPassLabel, rewriter.getUnitAttr()); + newXfer.op->setAttr(kPassLabel, rewriter.getUnitAttr()); } /// Handle out-of-bounds accesses on the to-be-unpacked dimension. - static void handleOutOfBoundsDim(PatternRewriter &rewriter, - TransferWriteOp xferOp, Value buffer, - Value iv) {} + static void handleOutOfBoundsDim( + PatternRewriter &rewriter, TransferWriteOp xferOp, Value buffer, + Value iv) {} /// Cleanup after rewriting the op. static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp) { @@ -361,13 +371,11 @@ template LogicalResult checkPrepareXferOp(OpTy xferOp) { if (xferOp->hasAttr(kPassLabel)) - return failure(); + return failure(); if (xferOp.getVectorType().getRank() <= kTargetRank) - return failure(); + return failure(); if (xferOp.mask()) - return failure(); - if (!xferOp.permutation_map().isMinorIdentity()) - return failure(); + return failure(); return success(); } @@ -392,7 +400,8 @@ /// memref.store %1, %0[] : memref> /// %vec = memref.load %0[] : memref> /// ``` -struct PrepareTransferReadConversion : public OpRewritePattern { +struct PrepareTransferReadConversion + : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(TransferReadOp xferOp, @@ -481,7 +490,7 @@ LogicalResult matchAndRewrite(OpTy xferOp, PatternRewriter &rewriter) const override { if (!xferOp->hasAttr(kPassLabel)) - return failure(); + return failure(); ScopedContext scope(rewriter, xferOp.getLoc()); // How the buffer can be found depends on OpTy. @@ -491,20 +500,16 @@ auto casted = vector_type_cast(castedType, buffer); auto lb = std_constant_index(0).value; - auto ub = - std_constant_index(castedType.getDimSize(castedType.getRank() - 1)) - .value; + auto ub = std_constant_index( + castedType.getDimSize(castedType.getRank() - 1)).value; affineLoopBuilder(lb, ub, 1, [&](Value iv) { generateInBoundsCheck( xferOp, iv, rewriter, unpackedDim(xferOp), - /*inBoundsCase=*/ - [&](OpBuilder & /*b*/, Location loc) { - Strategy::rewriteOp(rewriter, xferOp, casted, iv); - }, - /*outOfBoundsCase=*/ - [&](OpBuilder & /*b*/, Location loc) { - Strategy::handleOutOfBoundsDim(rewriter, xferOp, casted, iv); - }); + /*inBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) { + Strategy::rewriteOp(rewriter, xferOp, casted, iv); + }, /*outOfBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) { + Strategy::handleOutOfBoundsDim(rewriter, xferOp, casted, iv); + }); }); Strategy::cleanup(rewriter, xferOp); @@ -541,25 +546,25 @@ /// Codegen strategy for TransferReadOp. template <> struct Strategy1d { - static void generateForLoopBody(OpBuilder &builder, Location loc, - TransferReadOp xferOp, Value iv, - ValueRange loopState) { + static void generateForLoopBody( + OpBuilder &builder, Location loc, TransferReadOp xferOp, Value iv, + ValueRange loopState) { SmallVector indices; auto dim = get1dMemrefIndices(xferOp, iv, indices); - auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv); + auto ivI32 = std_index_cast( + IntegerType::get(builder.getContext(), 32), iv); auto vec = loopState[0]; // In case of out-of-bounds access, leave `vec` as is (was initialized with // padding value). auto nextVec = generateInBoundsCheck( xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()), - /*inBoundsCase=*/ - [&](OpBuilder & /*b*/, Location loc) { - auto val = memref_load(xferOp.source(), indices); - return vector_insert_element(val, vec, ivI32.value).value; - }, - /*outOfBoundsCase=*/ - [&](OpBuilder & /*b*/, Location loc) { return vec; }); + /*inBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) { + auto val = memref_load(xferOp.source(), indices); + return vector_insert_element(val, vec, ivI32.value).value; + }, /*outOfBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) { + return vec; + }); builder.create(loc, nextVec); } @@ -572,24 +577,27 @@ /// Codegen strategy for TransferWriteOp. template <> struct Strategy1d { - static void generateForLoopBody(OpBuilder &builder, Location loc, - TransferWriteOp xferOp, Value iv, - ValueRange /*loopState*/) { + static void generateForLoopBody( + OpBuilder &builder, Location loc, TransferWriteOp xferOp, Value iv, + ValueRange /*loopState*/) { SmallVector indices; auto dim = get1dMemrefIndices(xferOp, iv, indices); - auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv); + auto ivI32 = std_index_cast( + IntegerType::get(builder.getContext(), 32), iv); // Nothing to do in case of out-of-bounds access. generateInBoundsCheck( xferOp, iv, builder, dim, - /*inBoundsCase=*/[&](OpBuilder & /*b*/, Location loc) { - auto val = vector_extract_element(xferOp.vector(), ivI32.value); - memref_store(val, xferOp.source(), indices); - }); + /*inBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) { + auto val = vector_extract_element(xferOp.vector(), ivI32.value); + memref_store(val, xferOp.source(), indices); + }); builder.create(loc); } - static Value initialLoopState(TransferWriteOp xferOp) { return Value(); } + static Value initialLoopState(TransferWriteOp xferOp) { + return Value(); + } }; /// Lower a 1D vector transfer op that operates on a dimension different from @@ -623,11 +631,11 @@ auto map = xferOp.permutation_map(); if (xferOp.getVectorType().getRank() != 1) - return failure(); - if (map.isMinorIdentity()) // Handled by ConvertVectorToLLVM - return failure(); + return failure(); + if (map.isMinorIdentity()) // Handled by ConvertVectorToLLVM + return failure(); if (xferOp.mask()) - return failure(); + return failure(); // Loop bounds, step, state... auto vecType = xferOp.getVectorType(); @@ -640,16 +648,16 @@ rewriter.replaceOpWithNewOp( xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(), [&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) { - ScopedContext nestedScope(builder, loc); - Strategy1d::generateForLoopBody(builder, loc, xferOp, iv, - loopState); - }); + ScopedContext nestedScope(builder, loc); + Strategy1d::generateForLoopBody( + builder, loc, xferOp, iv, loopState); + }); return success(); } }; -} // namespace +} // namespace namespace mlir { @@ -657,10 +665,13 @@ RewritePatternSet &patterns) { patterns.add, - TransferOpConversion, - Strided1dTransferOpConversion, - Strided1dTransferOpConversion>( - patterns.getContext()); + TransferOpConversion>(patterns.getContext()); + + if (kTargetRank == 1) { + patterns.add, + Strided1dTransferOpConversion>( + patterns.getContext()); + } } struct ConvertProgressiveVectorToSCFPass @@ -672,8 +683,9 @@ } }; -} // namespace mlir +} // namespace mlir -std::unique_ptr mlir::createProgressiveConvertVectorToSCFPass() { +std::unique_ptr +mlir::createProgressiveConvertVectorToSCFPass() { return std::make_unique(); } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir @@ -17,6 +17,16 @@ return } +func @transfer_read_2d_transposed( + %A : memref, %base1: index, %base2: index) { + %fm42 = constant -42.0: f32 + %f = vector.transfer_read %A[%base1, %base2], %fm42 + {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : + memref, vector<4x9xf32> + vector.print %f: vector<4x9xf32> + return +} + func @transfer_write_2d(%A : memref, %base1: index, %base2: index) { %fn1 = constant -1.0 : f32 %vf0 = splat %fn1 : vector<1x4xf32> @@ -53,12 +63,20 @@ // On input, memory contains [[ 0, 1, 2, ...], [10, 11, 12, ...], ...] // Read shifted by 2 and pad with -42: call @transfer_read_2d(%A, %c1, %c2) : (memref, index, index) -> () + // Same as above, but transposed + call @transfer_read_2d_transposed(%A, %c1, %c2) + : (memref, index, index) -> () // Write into memory shifted by 3 call @transfer_write_2d(%A, %c3, %c1) : (memref, index, index) -> () // Read shifted by 0 and pad with -42: call @transfer_read_2d(%A, %c0, %c0) : (memref, index, index) -> () + // Same as above, but transposed + call @transfer_read_2d_transposed(%A, %c0, %c0) + : (memref, index, index) -> () return } // CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) +// CHECK: ( ( 12, 22, -42, -42, -42, -42, -42, -42, -42 ), ( 13, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) // CHECK: ( ( 0, 1, 2, 3, -42, -42, -42, -42, -42 ), ( 10, 11, 12, 13, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) +// CHECK: ( ( 0, 10, 20, -42, -42, -42, -42, -42, -42 ), ( 1, 11, 21, -42, -42, -42, -42, -42, -42 ), ( 2, 12, 22, -42, -42, -42, -42, -42, -42 ), ( 3, 13, 23, -42, -42, -42, -42, -42, -42 ) ) diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir @@ -19,6 +19,16 @@ return } +func @transfer_read_3d_transposed(%A : memref, + %o: index, %a: index, %b: index, %c: index) { + %fm42 = constant -42.0: f32 + %f = vector.transfer_read %A[%o, %a, %b, %c], %fm42 + {permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1)>} + : memref, vector<3x5x3xf32> + vector.print %f: vector<3x5x3xf32> + return +} + func @transfer_write_3d(%A : memref, %o: index, %a: index, %b: index, %c: index) { %fn1 = constant -1.0 : f32 @@ -66,8 +76,11 @@ : (memref, index, index, index, index) -> () call @transfer_read_3d(%A, %c0, %c0, %c0, %c0) : (memref, index, index, index, index) -> () + call @transfer_read_3d_transposed(%A, %c0, %c0, %c0, %c0) + : (memref, index, index, index, index) -> () return } // CHECK: ( ( ( 0, 0, -42 ), ( 2, 3, -42 ), ( 4, 6, -42 ), ( 6, 9, -42 ), ( -42, -42, -42 ) ), ( ( 20, 30, -42 ), ( 22, 33, -42 ), ( 24, 36, -42 ), ( 26, 39, -42 ), ( -42, -42, -42 ) ) ) // CHECK: ( ( ( 0, 0, -42 ), ( 2, -1, -42 ), ( 4, -1, -42 ), ( 6, -1, -42 ), ( -42, -42, -42 ) ), ( ( 20, 30, -42 ), ( 22, -1, -42 ), ( 24, -1, -42 ), ( 26, -1, -42 ), ( -42, -42, -42 ) ) ) +// CHECK: ( ( ( 0, 20, 40 ), ( 0, 20, 40 ), ( 0, 20, 40 ), ( 0, 20, 40 ), ( 0, 20, 40 ) ), ( ( 0, 30, 60 ), ( 0, 30, 60 ), ( 0, 30, 60 ), ( 0, 30, 60 ), ( 0, 30, 60 ) ), ( ( -42, -42, -42 ), ( -42, -42, -42 ), ( -42, -42, -42 ), ( -42, -42, -42 ), ( -42, -42, -42 ) ) ) diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -3,6 +3,11 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s +// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + #map0 = affine_map<(d0, d1) -> (d1, d0)> #map1 = affine_map<(d0, d1) -> (d1)>