diff --git a/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp --- a/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp @@ -74,9 +74,9 @@ template static Optional unpackedDim(OpTy xferOp) { auto map = xferOp.permutation_map(); - if (auto expr = map.getResult(0).template dyn_cast()) + if (auto expr = map.getResult(0).template dyn_cast()) { return expr.getPosition(); - + } assert(map.getResult(0).template isa() && "Expected AffineDimExpr or AffineConstantExpr"); return None; @@ -88,8 +88,9 @@ template static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) { auto map = xferOp.permutation_map(); - return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(), - builder.getContext()); + return AffineMap::get( + map.getNumDims(), 0, map.getResults().drop_front(), + builder.getContext()); } /// Calculate the indices for the new vector transfer op. @@ -114,8 +115,8 @@ } } -static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc, - Value value) { +static void maybeYieldValue( + bool hasRetVal, OpBuilder builder, Location loc, Value value) { if (hasRetVal) { builder.create(loc, value); } else { @@ -123,6 +124,20 @@ } } +/// Generates a boolean Value that is true if the iv-th bit in xferOp's mask +/// is set to true. Does not return a Value if the transfer op is not 1D or +/// if the transfer op does not have a mask. +template +static Value maybeGenerateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) { + if (xferOp.getVectorType().getRank() != 1) + return Value(); + if (!xferOp.mask()) + return Value(); + + auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv); + return vector_extract_element(xferOp.mask(), ivI32).value; +} + /// Helper function TransferOpConversion and TransferOp1dConversion. /// Generate an in-bounds check if the transfer op may go out-of-bounds on the /// specified dimension `dim` with the loop iteration variable `iv`. @@ -140,6 +155,10 @@ /// (out-of-bounds case) /// } /// ``` +/// +/// If the transfer is 1D and has a mask, this function generates a more complex +/// check also accounts for potentially masked out elements. +/// /// This function variant returns the value returned by `inBoundsCase` or /// `outOfBoundsCase`. The MLIR type of the return value must be specified in /// `resultTypes`. @@ -150,33 +169,45 @@ function_ref inBoundsCase, function_ref outOfBoundsCase = nullptr) { bool hasRetVal = !resultTypes.empty(); - bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts. + Value cond; // Condition to be built... + + // Condition check 1: Access in-bounds? + bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts. if (!xferOp.isDimInBounds(0) && !isBroadcast) { auto memrefDim = memref_dim(xferOp.source(), std_constant_index(dim.getValue())); using edsc::op::operator+; auto memrefIdx = xferOp.indices()[dim.getValue()] + iv; - auto cond = std_cmpi_sgt(memrefDim.value, memrefIdx); + cond = std_cmpi_sgt(memrefDim.value, memrefIdx); + } + + // Condition check 2: Masked in? + if (auto maskCond = maybeGenerateMaskCheck(builder, xferOp, iv)) { + if (cond) { + cond = builder.create(xferOp.getLoc(), cond, maskCond); + } else { + cond = maskCond; + } + } + + // If the condition is non-empty, generate an SCF::IfOp. + if (cond) { auto check = builder.create( xferOp.getLoc(), resultTypes, cond, - /*thenBuilder=*/ - [&](OpBuilder &builder, Location loc) { - maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc)); - }, - /*elseBuilder=*/ - [&](OpBuilder &builder, Location loc) { - if (outOfBoundsCase) { - maybeYieldValue(hasRetVal, builder, loc, - outOfBoundsCase(builder, loc)); - } else { - builder.create(loc); - } - }); + /*thenBuilder=*/[&](OpBuilder &builder, Location loc) { + maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc)); + }, /*elseBuilder=*/[&](OpBuilder &builder, Location loc) { + if (outOfBoundsCase) { + maybeYieldValue(hasRetVal, builder, loc, outOfBoundsCase(builder, loc)); + } else { + builder.create(loc); + } + }); return hasRetVal ? check.getResult(0) : Value(); } - // No runtime check needed if dim is guaranteed to be in-bounds. + // Condition is empty, no need for an SCF::IfOp. return inBoundsCase(builder, xferOp.getLoc()); } @@ -189,15 +220,13 @@ function_ref outOfBoundsCase = nullptr) { generateInBoundsCheck( xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(), - /*inBoundsCase=*/ - [&](OpBuilder &builder, Location loc) { + /*inBoundsCase=*/[&](OpBuilder &builder, Location loc) { inBoundsCase(builder, loc); return Value(); }, - /*outOfBoundsCase=*/ - [&](OpBuilder &builder, Location loc) { + /*outOfBoundsCase=*/[&](OpBuilder &builder, Location loc) { if (outOfBoundsCase) - outOfBoundsCase(builder, loc); + outOfBoundsCase(builder, loc); return Value(); }); } @@ -271,8 +300,8 @@ /// /// Note: The loop and type cast are generated in TransferOpConversion. /// The original TransferReadOp and store op are deleted in `cleanup`. - static void rewriteOp(OpBuilder &builder, TransferReadOp xferOp, Value buffer, - Value iv) { + static void rewriteOp(OpBuilder &builder, TransferReadOp xferOp, + Value buffer, Value iv) { SmallVector storeIndices; getStoreIndices(xferOp, storeIndices); storeIndices.push_back(iv); @@ -283,24 +312,22 @@ auto bufferType = buffer.getType().dyn_cast(); auto vecType = bufferType.getElementType().dyn_cast(); auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr()); - auto newXfer = - vector_transfer_read( - vecType, xferOp.source(), xferIndices, - AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), - xferOp.padding(), Value(), inBoundsAttr) - .value; + auto newXfer = vector_transfer_read( + vecType, xferOp.source(), xferIndices, + AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), + xferOp.padding(), Value(), inBoundsAttr).value; if (vecType.getRank() > kTargetRank) - newXfer.getDefiningOp()->setAttr(kPassLabel, builder.getUnitAttr()); + newXfer.getDefiningOp()->setAttr(kPassLabel, builder.getUnitAttr()); memref_store(newXfer, buffer, storeIndices); } /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write /// padding value to the temporary buffer. - static void handleOutOfBoundsDim(OpBuilder & /*builder*/, - TransferReadOp xferOp, Value buffer, - Value iv) { + static void handleOutOfBoundsDim( + OpBuilder &/*builder*/, TransferReadOp xferOp, Value buffer, + Value iv) { SmallVector storeIndices; getStoreIndices(xferOp, storeIndices); storeIndices.push_back(iv); @@ -365,16 +392,17 @@ auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr()); auto newXfer = vector_transfer_write( Type(), vec, xferOp.source(), xferIndices, - AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), Value(), - inBoundsAttr); + AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), + Value(), inBoundsAttr); if (vecType.getRank() > kTargetRank) - newXfer.op->setAttr(kPassLabel, builder.getUnitAttr()); + newXfer.op->setAttr(kPassLabel, builder.getUnitAttr()); } /// Handle out-of-bounds accesses on the to-be-unpacked dimension. - static void handleOutOfBoundsDim(OpBuilder &builder, TransferWriteOp xferOp, - Value buffer, Value iv) {} + static void handleOutOfBoundsDim( + OpBuilder &builder, TransferWriteOp xferOp, Value buffer, + Value iv) {} /// Cleanup after rewriting the op. static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp) { @@ -522,20 +550,18 @@ // Generate for loop. rewriter.create( xferOp.getLoc(), lb, ub, step, ValueRange(), - [&](OpBuilder &b, Location loc, Value iv, ValueRange /*loopState*/) { - ScopedContext scope(b, loc); - generateInBoundsCheck( - xferOp, iv, b, unpackedDim(xferOp), - /*inBoundsCase=*/ - [&](OpBuilder &b, Location /*loc*/) { - Strategy::rewriteOp(b, xferOp, casted, iv); - }, - /*outOfBoundsCase=*/ - [&](OpBuilder &b, Location /*loc*/) { - Strategy::handleOutOfBoundsDim(b, xferOp, casted, iv); - }); - b.create(loc); - }); + [&](OpBuilder &b, Location loc, Value iv, + ValueRange /*loopState*/) { + ScopedContext scope(b, loc); + generateInBoundsCheck( + xferOp, iv, b, unpackedDim(xferOp), + /*inBoundsCase=*/[&](OpBuilder &b, Location /*loc*/) { + Strategy::rewriteOp(b, xferOp, casted, iv); + }, /*outOfBoundsCase=*/[&](OpBuilder &b, Location /*loc*/) { + Strategy::handleOutOfBoundsDim(b, xferOp, casted, iv); + }); + b.create(loc); + }); Strategy::cleanup(rewriter, xferOp); return success(); @@ -546,9 +572,8 @@ /// part of TransferOp1dConversion. Return the memref dimension on which /// the transfer is operating. A return value of None indicates a broadcast. template -static Optional -get1dMemrefIndices(OpTy xferOp, Value iv, - SmallVector &memrefIndices) { +static Optional get1dMemrefIndices( + OpTy xferOp, Value iv, SmallVector &memrefIndices) { auto indices = xferOp.indices(); auto map = xferOp.permutation_map(); @@ -575,25 +600,25 @@ /// Codegen strategy for TransferReadOp. template <> struct Strategy1d { - static void generateForLoopBody(OpBuilder &builder, Location loc, - TransferReadOp xferOp, Value iv, - ValueRange loopState) { + static void generateForLoopBody( + OpBuilder &builder, Location loc, TransferReadOp xferOp, Value iv, + ValueRange loopState) { SmallVector indices; auto dim = get1dMemrefIndices(xferOp, iv, indices); - auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv); + auto ivI32 = std_index_cast( + IntegerType::get(builder.getContext(), 32), iv); auto vec = loopState[0]; // In case of out-of-bounds access, leave `vec` as is (was initialized with // padding value). auto nextVec = generateInBoundsCheck( xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()), - /*inBoundsCase=*/ - [&](OpBuilder & /*b*/, Location loc) { - auto val = memref_load(xferOp.source(), indices); - return vector_insert_element(val, vec, ivI32.value).value; - }, - /*outOfBoundsCase=*/ - [&](OpBuilder & /*b*/, Location loc) { return vec; }); + /*inBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) { + auto val = memref_load(xferOp.source(), indices); + return vector_insert_element(val, vec, ivI32.value).value; + }, /*outOfBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) { + return vec; + }); builder.create(loc, nextVec); } @@ -606,24 +631,27 @@ /// Codegen strategy for TransferWriteOp. template <> struct Strategy1d { - static void generateForLoopBody(OpBuilder &builder, Location loc, - TransferWriteOp xferOp, Value iv, - ValueRange /*loopState*/) { + static void generateForLoopBody( + OpBuilder &builder, Location loc, TransferWriteOp xferOp, Value iv, + ValueRange /*loopState*/) { SmallVector indices; auto dim = get1dMemrefIndices(xferOp, iv, indices); - auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv); + auto ivI32 = std_index_cast( + IntegerType::get(builder.getContext(), 32), iv); // Nothing to do in case of out-of-bounds access. generateInBoundsCheck( xferOp, iv, builder, dim, - /*inBoundsCase=*/[&](OpBuilder & /*b*/, Location loc) { - auto val = vector_extract_element(xferOp.vector(), ivI32.value); - memref_store(val, xferOp.source(), indices); - }); + /*inBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) { + auto val = vector_extract_element(xferOp.vector(), ivI32.value); + memref_store(val, xferOp.source(), indices); + }); builder.create(loc); } - static Value initialLoopState(TransferWriteOp xferOp) { return Value(); } + static Value initialLoopState(TransferWriteOp xferOp) { + return Value(); + } }; /// Lower a 1D vector transfer op to SCF using scalar loads/stores. This is @@ -667,11 +695,9 @@ auto map = xferOp.permutation_map(); if (xferOp.getVectorType().getRank() != 1) - return failure(); - if (map.isMinorIdentity()) // Handled by ConvertVectorToLLVM - return failure(); - if (xferOp.mask()) - return failure(); + return failure(); + if (map.isMinorIdentity()) // Handled by ConvertVectorToLLVM + return failure(); // Loop bounds, step, state... auto vecType = xferOp.getVectorType(); @@ -684,10 +710,10 @@ rewriter.replaceOpWithNewOp( xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(), [&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) { - ScopedContext nestedScope(builder, loc); - Strategy1d::generateForLoopBody(builder, loc, xferOp, iv, - loopState); - }); + ScopedContext nestedScope(builder, loc); + Strategy1d::generateForLoopBody( + builder, loc, xferOp, iv, loopState); + }); return success(); } @@ -699,7 +725,8 @@ void populateProgressiveVectorToSCFConversionPatterns( RewritePatternSet &patterns) { - patterns.add, TransferOpConversion>(patterns.getContext()); @@ -725,4 +752,3 @@ mlir::createProgressiveConvertVectorToSCFPass() { return std::make_unique(); } - diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir @@ -1,8 +1,3 @@ -// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ -// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ -// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ -// RUN: FileCheck %s - // RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ @@ -10,34 +5,57 @@ // Test for special cases of 1D vector transfer ops. -func @transfer_read_2d(%A : memref, %base1 : index, %base2 : index) { +func @transfer_read_1d(%A : memref, %base1 : index, %base2 : index) { %fm42 = constant -42.0: f32 %f = vector.transfer_read %A[%base1, %base2], %fm42 - {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} - : memref, vector<5x6xf32> - vector.print %f: vector<5x6xf32> + {permutation_map = affine_map<(d0, d1) -> (d0)>} + : memref, vector<9xf32> + vector.print %f: vector<9xf32> return } -func @transfer_read_1d(%A : memref, %base1 : index, %base2 : index) { +func @transfer_read_1d_broadcast( + %A : memref, %base1 : index, %base2 : index) { %fm42 = constant -42.0: f32 %f = vector.transfer_read %A[%base1, %base2], %fm42 - {permutation_map = affine_map<(d0, d1) -> (d0)>} + {permutation_map = affine_map<(d0, d1) -> (0)>} : memref, vector<9xf32> vector.print %f: vector<9xf32> return } -func @transfer_read_1d_broadcast( +func @transfer_read_1d_in_bounds( %A : memref, %base1 : index, %base2 : index) { %fm42 = constant -42.0: f32 %f = vector.transfer_read %A[%base1, %base2], %fm42 - {permutation_map = affine_map<(d0, d1) -> (0)>} + {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]} + : memref, vector<3xf32> + vector.print %f: vector<3xf32> + return +} + +func @transfer_read_1d_mask( + %A : memref, %base1 : index, %base2 : index) { + %fm42 = constant -42.0: f32 + %mask = constant dense<[1, 0, 1, 0, 1, 1, 1, 0, 1]> : vector<9xi1> + %f = vector.transfer_read %A[%base1, %base2], %fm42, %mask + {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref, vector<9xf32> vector.print %f: vector<9xf32> return } +func @transfer_read_1d_mask_in_bounds( + %A : memref, %base1 : index, %base2 : index) { + %fm42 = constant -42.0: f32 + %mask = constant dense<[1, 0, 1]> : vector<3xi1> + %f = vector.transfer_read %A[%base1, %base2], %fm42, %mask + {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]} + : memref, vector<3xf32> + vector.print %f: vector<3xf32> + return +} + func @transfer_write_1d(%A : memref, %base1 : index, %base2 : index) { %fn1 = constant -1.0 : f32 %vf0 = splat %fn1 : vector<7xf32> @@ -69,14 +87,35 @@ } } + // Read from 2D memref on first dimension. Cannot be lowered to an LLVM + // vector load. Instead, generates scalar loads. call @transfer_read_1d(%A, %c1, %c2) : (memref, index, index) -> () + // Write to 2D memref on first dimension. Cannot be lowered to an LLVM + // vector store. Instead, generates scalar stores. call @transfer_write_1d(%A, %c3, %c2) : (memref, index, index) -> () + // (Same as above.) call @transfer_read_1d(%A, %c0, %c2) : (memref, index, index) -> () + // Read a scalar from a 2D memref and broadcast the value to a 1D vector. + // Generates a loop with vector.insertelement. call @transfer_read_1d_broadcast(%A, %c1, %c2) : (memref, index, index) -> () + // Read from 2D memref on first dimension. Accesses are in-bounds, so no + // if-check is generated inside the generated loop. + call @transfer_read_1d_in_bounds(%A, %c1, %c2) + : (memref, index, index) -> () + // Optional mask attribute is specified and, in addition, there may be + // out-of-bounds accesses. + call @transfer_read_1d_mask(%A, %c1, %c2) + : (memref, index, index) -> () + // Same as above, but accesses are in-bounds. + call @transfer_read_1d_mask_in_bounds(%A, %c1, %c2) + : (memref, index, index) -> () return } // CHECK: ( 12, 22, 32, 42, -42, -42, -42, -42, -42 ) // CHECK: ( 2, 12, 22, -1, -1, -42, -42, -42, -42 ) // CHECK: ( 12, 12, 12, 12, 12, 12, 12, 12, 12 ) +// CHECK: ( 12, 22, -1 ) +// CHECK: ( 12, -42, -1, -42, -42, -42, -42, -42, -42 ) +// CHECK: ( 12, -42, -1 )