diff --git a/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h b/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h --- a/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h +++ b/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h @@ -14,8 +14,8 @@ class OwningRewritePatternList; /// Collect a set of patterns to convert from the Vector dialect to loops + std. -void populateVectorToAffineLoopsConversionPatterns( - MLIRContext *context, OwningRewritePatternList &patterns); +void populateVectorToLoopsConversionPatterns(OwningRewritePatternList &patterns, + MLIRContext *context); } // namespace mlir diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h --- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h @@ -68,6 +68,7 @@ class VectorBoundsCapture : public BoundsCapture { public: explicit VectorBoundsCapture(Value v); + explicit VectorBoundsCapture(VectorType t); VectorBoundsCapture(const VectorBoundsCapture &) = default; VectorBoundsCapture &operator=(const VectorBoundsCapture &) = default; diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h --- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h @@ -14,8 +14,10 @@ namespace edsc { namespace intrinsics { +using std_addi = ValueBuilder; using std_addf = ValueBuilder; using std_alloc = ValueBuilder; +using std_alloca = ValueBuilder; using std_call = OperationBuilder; using std_constant = ValueBuilder; using std_constant_float = ValueBuilder; @@ -31,6 +33,7 @@ using std_ret = OperationBuilder; using std_select = ValueBuilder; using std_load = ValueBuilder; +using std_splat = ValueBuilder; using std_store = OperationBuilder; using std_subi = ValueBuilder; using std_sub_view = ValueBuilder; diff --git a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h --- a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h @@ -18,6 +18,8 @@ using vector_contract = ValueBuilder; using vector_matmul = ValueBuilder; using vector_print = OperationBuilder; +using vector_transfer_read = ValueBuilder; +using vector_transfer_write = OperationBuilder; using vector_type_cast = ValueBuilder; } // namespace intrinsics diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -1131,7 +1131,7 @@ def Vector_TypeCastOp : Vector_Op<"type_cast", [NoSideEffect]>, Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>, - Results<(outs AnyMemRef)> { + Results<(outs AnyMemRef:$result)> { let summary = "type_cast op converts a scalar memref to a vector memref"; let description = [{ Performs a conversion from a memref with scalar element to a memref with a @@ -1154,13 +1154,11 @@ ``` }]; + /// Build the canonical memRefType with a single vector. + /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref>. let builders = [OpBuilder< "OpBuilder &builder, OperationState &result, Value source">]; - let parser = [{ - return impl::parseCastOp(parser, result); - }]; - let extraClassDeclaration = [{ MemRefType getMemRefType() { return memref().getType().cast(); @@ -1169,6 +1167,10 @@ return getResult().getType().cast(); } }]; + + let assemblyFormat = [{ + $memref attr-dict `:` type($memref) `to` type($result) + }]; } def Vector_ConstantMaskOp : diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -16,7 +16,7 @@ class OwningRewritePatternList; /// Collect a set of patterns to convert from the Vector dialect to itself. -/// Should be merged with populateVectorToAffineLoopsConversionPatterns. +/// Should be merged with populateVectorToLoopsLoweringPattern. void populateVectorToVectorConversionPatterns( MLIRContext *context, OwningRewritePatternList &patterns, ArrayRef coarseVectorShape = {}, diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h --- a/mlir/include/mlir/EDSC/Builders.h +++ b/mlir/include/mlir/EDSC/Builders.h @@ -120,13 +120,16 @@ /// scoping itself, we use enter/exit pairs of operations. /// As a consequence we must allocate a new OpBuilder + ScopedContext and /// let the escape. - /// Step back "prev" times from the end of the block to set up the insertion - /// point, which is useful for non-empty blocks. - void enter(mlir::Block *block, int prev = 0) { + void enter(mlir::Block *block) { bodyScope = new ScopedContext( ScopedContext::getBuilder(), - OpBuilder::InsertPoint(block, std::prev(block->end(), prev)), + OpBuilder::InsertPoint(block, std::prev(block->end())), ScopedContext::getLocation()); + if (!block->empty()) { + auto &termOp = block->back(); + if (termOp.isKnownTerminator()) + ScopedContext::getBuilder().setInsertionPoint(&termOp); + } bodyScope->nestedBuilder = this; } @@ -199,7 +202,8 @@ class BlockBuilder : public NestedBuilder { public: /// Enters the mlir::Block* previously captured by `bh` and sets the insertion - /// point to its end. + /// point to its end. If the block already contains a terminator, set the + /// insertion point before the terminator. BlockBuilder(BlockHandle bh, Append); /// Constructs a new mlir::Block with argument types derived from `args`. diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -66,6 +66,11 @@ static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context); + /// Returns an identity affine map (d0, ..., dn) -> (dp, ..., dn) on the most + /// minor dimensions. + static AffineMap getMinorIdentityMap(unsigned dims, unsigned results, + MLIRContext *context); + /// Returns an AffineMap representing a permutation. /// The permutation is expressed as a non-empty vector of integers. /// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with @@ -94,6 +99,10 @@ /// dimensional identifiers. bool isIdentity() const; + /// Returns true if the map is a minor identity map, i.e. an identity affine + /// map (d0, ..., dn) -> (dp, ..., dn) on the most minor dimensions. + static bool isMinorIdentity(AffineMap map); + /// Returns true if this affine map is an empty map, i.e., () -> (). bool isEmpty() const; diff --git a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt --- a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt @@ -18,6 +18,7 @@ MLIRLoopToStandard MLIRStandardToLLVM MLIRVectorToLLVM + MLIRVectorToLoops MLIRTransforms LLVMCore LLVMSupport diff --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp --- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp +++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp @@ -14,6 +14,7 @@ #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" +#include "mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" @@ -575,6 +576,7 @@ populateAffineToStdConversionPatterns(patterns, &getContext()); populateLoopToStdConversionPatterns(patterns, &getContext()); populateStdToLLVMConversionPatterns(converter, patterns); + populateVectorToLoopsConversionPatterns(patterns, &getContext()); populateVectorToLLVMMatrixConversionPatterns(converter, patterns); populateVectorToLLVMConversionPatterns(converter, patterns); populateLinalgToStandardConversionPatterns(patterns, &getContext()); diff --git a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp --- a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp +++ b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp @@ -15,6 +15,7 @@ #include "mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h" #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/LoopOps/EDSC/Builders.h" +#include "mlir/Dialect/LoopOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/VectorOps.h" @@ -34,6 +35,223 @@ using vector::TransferReadOp; using vector::TransferWriteOp; +/// Helper class captures the common information needed to lower N>1-D vector +/// transfer operations (read and write). +/// On construction, this class opens an edsc::ScopedContext for simpler IR +/// manipulation. +/// In pseudo-IR, for an n-D vector_transfer_read such as: +/// +/// ``` +/// vector_transfer_read(%m, %offsets, identity_map, %fill) : +/// memref<(leading_dims) x (major_dims) x (minor_dims) x type>, +/// vector<(major_dims) x (minor_dims) x type> +/// ``` +/// +/// where rank(minor_dims) is the lower-level vector rank (e.g. 1 for LLVM or +/// higher). +/// +/// This is the entry point to emitting pseudo-IR resembling: +/// +/// ``` +/// %tmp = alloc(): memref<(major_dims) x vector> +/// for (%ivs_major, {0}, {vector_shape}, {1}) { // (N-1)-D loop nest +/// if (any_of(%ivs_major + %offsets, <, major_dims)) { +/// %v = vector_transfer_read( +/// {%offsets_leading, %ivs_major + %offsets_major, %offsets_minor}, +/// %ivs_minor): +/// memref<(leading_dims) x (major_dims) x (minor_dims) x type>, +/// vector<(minor_dims) x type>; +/// store(%v, %tmp); +/// } else { +/// %v = splat(vector<(minor_dims) x type>, %fill) +/// store(%v, %tmp, %ivs_major); +/// } +/// } +/// %res = load(%tmp, %0): memref<(major_dims) x vector>): +// vector<(major_dims) x (minor_dims) x type> +/// ``` +/// +template +class NDTransferOpHelper { +public: + NDTransferOpHelper(PatternRewriter &rewriter, ConcreteOp xferOp) + : rewriter(rewriter), loc(xferOp.getLoc()), + scope(std::make_unique(rewriter, loc)), xferOp(xferOp), + op(xferOp.getOperation()) { + vectorType = xferOp.getVectorType(); + // TODO(ntv, ajcbik): when we go to k > 1-D vectors adapt minorRank. + minorRank = 1; + majorRank = vectorType.getRank() - minorRank; + leadingRank = xferOp.getMemRefType().getRank() - (majorRank + minorRank); + majorVectorType = + VectorType::get(vectorType.getShape().take_front(majorRank), + vectorType.getElementType()); + minorVectorType = + VectorType::get(vectorType.getShape().take_back(minorRank), + vectorType.getElementType()); + /// Memref of minor vector type is used for individual transfers. + memRefMinorVectorType = + MemRefType::get(majorVectorType.getShape(), minorVectorType, {}, + xferOp.getMemRefType().getMemorySpace()); + } + + LogicalResult doReplace(); + +private: + /// Creates the loop nest on the "major" dimensions and calls the + /// `loopBodyBuilder` lambda in the context of the loop nest. + template + void emitLoops(Lambda loopBodyBuilder); + + /// Operate within the body of `emitLoops` to: + /// 1. Compute the indexings `majorIvs + majorOffsets`. + /// 2. Compute a boolean that determines whether the first `majorIvs.rank()` + /// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. + /// 3. Create an IfOp conditioned on the boolean in step 2. + /// 4. Call a `thenBlockBuilder` and an `elseBlockBuilder` to append + /// operations to the IfOp blocks as appropriate. + template + void emitInBounds(ValueRange majorIvs, ValueRange majorOffsets, + MemRefBoundsCapture &memrefBounds, + LambdaThen thenBlockBuilder, LambdaElse elseBlockBuilder); + + /// Common state to lower vector transfer ops. + PatternRewriter &rewriter; + Location loc; + std::unique_ptr scope; + ConcreteOp xferOp; + Operation *op; + // A vector transfer copies data between: + // - memref<(leading_dims) x (major_dims) x (minor_dims) x type> + // - vector<(major_dims) x (minor_dims) x type> + unsigned minorRank; // for now always 1 + unsigned majorRank; // vector rank - minorRank + unsigned leadingRank; // memref rank - vector rank + VectorType vectorType; // vector<(major_dims) x (minor_dims) x type> + VectorType majorVectorType; // vector<(major_dims) x type> + VectorType minorVectorType; // vector<(minor_dims) x type> + MemRefType memRefMinorVectorType; // memref> +}; + +template +template +void NDTransferOpHelper::emitLoops(Lambda loopBodyBuilder) { + /// Loop nest operates on the major dimensions + MemRefBoundsCapture memrefBoundsCapture(xferOp.memref()); + VectorBoundsCapture vectorBoundsCapture(majorVectorType); + auto majorLbs = vectorBoundsCapture.getLbs(); + auto majorUbs = vectorBoundsCapture.getUbs(); + auto majorSteps = vectorBoundsCapture.getSteps(); + SmallVector majorIvs(vectorBoundsCapture.rank()); + AffineLoopNestBuilder(majorIvs, majorLbs, majorUbs, majorSteps)([&] { + ValueRange indices(xferOp.indices()); + loopBodyBuilder(majorIvs, indices.take_front(leadingRank), + indices.drop_front(leadingRank).take_front(majorRank), + indices.take_back(minorRank), memrefBoundsCapture); + }); +} + +template +template +void NDTransferOpHelper::emitInBounds( + ValueRange majorIvs, ValueRange majorOffsets, + MemRefBoundsCapture &memrefBounds, LambdaThen thenBlockBuilder, + LambdaElse elseBlockBuilder) { + Value inBounds = std_constant_int(/*value=*/1, /*width=*/1); + SmallVector majorIvsPlusOffsets; + majorIvsPlusOffsets.reserve(majorIvs.size()); + for (auto it : llvm::zip(majorIvs, majorOffsets, memrefBounds.getUbs())) { + Value iv = std::get<0>(it), off = std::get<1>(it), ub = std::get<2>(it); + using namespace mlir::edsc::op; + majorIvsPlusOffsets.push_back(iv + off); + Value inBounds2 = majorIvsPlusOffsets.back() < ub; + inBounds = inBounds && inBounds2; + } + + auto ifOp = ScopedContext::getBuilder().create( + ScopedContext::getLocation(), TypeRange{}, inBounds, + /*withElseRegion=*/std::is_same()); + BlockBuilder(&ifOp.thenRegion().front(), + Append())([&] { thenBlockBuilder(majorIvsPlusOffsets); }); + if (std::is_same()) + BlockBuilder(&ifOp.elseRegion().front(), + Append())([&] { elseBlockBuilder(majorIvsPlusOffsets); }); +} + +template <> +LogicalResult NDTransferOpHelper::doReplace() { + Value alloc = std_alloc(memRefMinorVectorType); + + emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, + ValueRange majorOffsets, ValueRange minorOffsets, + MemRefBoundsCapture &memrefBounds) { + // If in-bounds, index into memref and lower to 1-D transfer read. + auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { + auto map = AffineMap::getMinorIdentityMap( + xferOp.getMemRefType().getRank(), minorRank, xferOp.getContext()); + // Lower to 1-D vector_transfer_read and let recursion handle it. + Value memref = xferOp.memref(); + SmallVector indexing; + indexing.reserve(leadingRank + majorRank + minorRank); + indexing.append(leadingOffsets.begin(), leadingOffsets.end()); + indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); + indexing.append(minorOffsets.begin(), minorOffsets.end()); + auto loaded1D = + vector_transfer_read(minorVectorType, memref, indexing, + AffineMapAttr::get(map), xferOp.padding()); + // Store the 1-D vector. + std_store(loaded1D, alloc, majorIvs); + }; + // If out-of-bounds, just store a splatted vector. + auto elseBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { + auto vector = std_splat(minorVectorType, xferOp.padding()); + std_store(vector, alloc, majorIvs); + }; + emitInBounds(majorIvs, majorOffsets, memrefBounds, thenBlockBuilder, + elseBlockBuilder); + }); + + Value loaded = + std_load(vector_type_cast(MemRefType::get({}, vectorType), alloc)); + rewriter.replaceOp(op, loaded); + + return success(); +} + +template <> +LogicalResult NDTransferOpHelper::doReplace() { + Value alloc = std_alloc(memRefMinorVectorType); + + std_store(xferOp.vector(), + vector_type_cast(MemRefType::get({}, vectorType), alloc)); + + emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, + ValueRange majorOffsets, ValueRange minorOffsets, + MemRefBoundsCapture &memrefBounds) { + auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { + // Lower to 1-D vector_transfer_write and let recursion handle it. + Value loaded1D = std_load(alloc, majorIvs); + auto map = AffineMap::getMinorIdentityMap( + xferOp.getMemRefType().getRank(), minorRank, xferOp.getContext()); + SmallVector indexing; + indexing.reserve(leadingRank + majorRank + minorRank); + indexing.append(leadingOffsets.begin(), leadingOffsets.end()); + indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); + indexing.append(minorOffsets.begin(), minorOffsets.end()); + vector_transfer_write(loaded1D, xferOp.memref(), indexing, + AffineMapAttr::get(map)); + }; + // Don't write anything when out of bounds. + auto elseBlockBuilder = [&](ValueRange majorIvsPlusOffsets) {}; + emitInBounds(majorIvs, majorOffsets, memrefBounds, thenBlockBuilder, + elseBlockBuilder); + }); + + rewriter.eraseOp(op); + + return success(); +} + /// Analyzes the `transfer` to find an access dimension along the fastest remote /// MemRef dimension. If such a dimension with coalescing properties is found, /// `pivs` and `vectorBoundsCapture` are swapped so that the invocation of @@ -243,7 +461,16 @@ using namespace mlir::edsc::op; TransferReadOp transfer = cast(op); + if (AffineMap::isMinorIdentity(transfer.permutation_map())) { + // If > 1D, emit a bunch of loops around 1-D vector transfers. + if (transfer.getVectorType().getRank() > 1) + return NDTransferOpHelper(rewriter, transfer).doReplace(); + // If 1-D this is now handled by the target-specific lowering. + if (transfer.getVectorType().getRank() == 1) + return failure(); + } + // Conservative lowering to scalar load / stores. // 1. Setup all the captures. ScopedContext scope(rewriter, transfer.getLoc()); StdIndexedValue remote(transfer.memref()); @@ -306,6 +533,15 @@ using namespace edsc::op; TransferWriteOp transfer = cast(op); + if (AffineMap::isMinorIdentity(transfer.permutation_map())) { + // If > 1D, emit a bunch of loops around 1-D vector transfers. + if (transfer.getVectorType().getRank() > 1) + return NDTransferOpHelper(rewriter, transfer) + .doReplace(); + // If 1-D this is now handled by the target-specific lowering. + if (transfer.getVectorType().getRank() == 1) + return failure(); + } // 1. Setup all the captures. ScopedContext scope(rewriter, transfer.getLoc()); @@ -347,8 +583,8 @@ } // namespace -void mlir::populateVectorToAffineLoopsConversionPatterns( - MLIRContext *context, OwningRewritePatternList &patterns) { +void mlir::populateVectorToLoopsConversionPatterns( + OwningRewritePatternList &patterns, MLIRContext *context) { patterns.insert, VectorTransferRewriter>(context); } diff --git a/mlir/lib/Dialect/Affine/EDSC/Builders.cpp b/mlir/lib/Dialect/Affine/EDSC/Builders.cpp --- a/mlir/lib/Dialect/Affine/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Affine/EDSC/Builders.cpp @@ -50,7 +50,7 @@ } auto *body = getForInductionVarOwner(*iv).getBody(); - result.enter(body, /*prev=*/1); + result.enter(body); return result; } diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp --- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp @@ -31,7 +31,7 @@ ForOp forOp = OperationBuilder(lb, ub, step); *iv = forOp.getInductionVar(); auto *body = forOp.getBody(); - enter(body, /*prev=*/1); + enter(body); } mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(Value *iv, @@ -39,7 +39,7 @@ ForOp forOp = OperationBuilder(range.offset, range.size, range.stride); *iv = forOp.getInductionVar(); auto *body = forOp.getBody(); - enter(body, /*prev=*/1); + enter(body); } Value mlir::edsc::LoopRangeBuilder::operator()(std::function fun) { diff --git a/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp b/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp --- a/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp @@ -89,7 +89,7 @@ for (size_t i = 0, e = ivs.size(); i < e; ++i) ivs[i] = parallelOp.getBody()->getArgument(i); LoopBuilder result; - result.enter(parallelOp.getBody(), /*prev=*/1); + result.enter(parallelOp.getBody()); return result; } @@ -107,6 +107,6 @@ iterArgsHandles[i] = body->getArgument(i + 1); } result.setOp(forOp); - result.enter(body, /*prev=*/1); + result.enter(body); return result; } diff --git a/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp b/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp --- a/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp @@ -30,7 +30,7 @@ return res; } -mlir::edsc::MemRefBoundsCapture::MemRefBoundsCapture(Value v) : base(v) { +mlir::edsc::MemRefBoundsCapture::MemRefBoundsCapture(Value v) { auto memrefSizeValues = getMemRefSizes(v); for (auto s : memrefSizeValues) { lbs.push_back(std_constant_index(0)); @@ -39,11 +39,13 @@ } } -mlir::edsc::VectorBoundsCapture::VectorBoundsCapture(Value v) : base(v) { - auto vectorType = v.getType().cast(); - for (auto s : vectorType.getShape()) { +mlir::edsc::VectorBoundsCapture::VectorBoundsCapture(VectorType t) { + for (auto s : t.getShape()) { lbs.push_back(std_constant_index(0)); ubs.push_back(std_constant_index(s)); steps.push_back(1); } } + +mlir::edsc::VectorBoundsCapture::VectorBoundsCapture(Value v) + : VectorBoundsCapture(v.getType().cast()) {} diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -1,4 +1,4 @@ -//===- VectorOps.cpp - MLIR Super Vectorizer Operations -------------------===// +//===- VectorOps.cpp - MLIR Vector Dialect Operations ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -44,8 +44,8 @@ /// Materialize a single constant operation from a given attribute value with /// the desired resultant type. Operation *VectorDialect::materializeConstant(OpBuilder &builder, - Attribute value, Type type, - Location loc) { + Attribute value, Type type, + Location loc) { return builder.create(loc, type, value); } @@ -1462,31 +1462,52 @@ // TypeCastOp //===----------------------------------------------------------------------===// -static MemRefType inferVectorTypeCastResultType(MemRefType t) { - return MemRefType::get({}, VectorType::get(t.getShape(), t.getElementType())); +static SmallVector extractShape(MemRefType memRefType) { + auto vectorType = memRefType.getElementType().dyn_cast(); + SmallVector res(memRefType.getShape().begin(), + memRefType.getShape().end()); + if (vectorType) { + res.reserve(memRefType.getRank() + vectorType.getRank()); + for (auto s : vectorType.getShape()) + res.push_back(s); + } + return res; } +/// Build the canonical memRefType with a single vector. +/// E.g. memref<4 x 5 x vector<6 x f32>> -> memref>. void TypeCastOp::build(OpBuilder &builder, OperationState &result, Value source) { result.addOperands(source); + MemRefType memRefType = source.getType().cast(); + VectorType vectorType = + VectorType::get(extractShape(memRefType), + getElementTypeOrSelf(getElementTypeOrSelf(memRefType))); result.addTypes( - inferVectorTypeCastResultType(source.getType().cast())); -} - -static void print(OpAsmPrinter &p, TypeCastOp op) { - auto type = op.getOperand().getType().cast(); - p << op.getOperationName() << ' ' << op.memref() << " : " << type << " to " - << inferVectorTypeCastResultType(type); + MemRefType::get({}, vectorType, {}, memRefType.getMemorySpace())); } static LogicalResult verify(TypeCastOp op) { MemRefType canonicalType = canonicalizeStridedLayout(op.getMemRefType()); if (!canonicalType.getAffineMaps().empty()) return op.emitOpError("expects operand to be a memref with no layout"); - - auto resultType = inferVectorTypeCastResultType(op.getMemRefType()); - if (op.getResultMemRefType() != resultType) - return op.emitOpError("expects result type to be: ") << resultType; + if (!op.getResultMemRefType().getAffineMaps().empty()) + return op.emitOpError("expects result to be a memref with no layout"); + if (op.getResultMemRefType().getMemorySpace() != + op.getMemRefType().getMemorySpace()) + return op.emitOpError("expects result in same memory space"); + + auto sourceType = op.getMemRefType(); + auto resultType = op.getResultMemRefType(); + if (getElementTypeOrSelf(getElementTypeOrSelf(sourceType)) != + getElementTypeOrSelf(getElementTypeOrSelf(resultType))) + return op.emitOpError( + "expects result and operand with same underlying scalar type: ") + << resultType; + if (extractShape(sourceType) != extractShape(resultType)) + return op.emitOpError( + "expects concatenated result and operand shapes to be equal: ") + << resultType; return success(); } diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -95,6 +95,22 @@ {getAffineConstantExpr(val, context)}); } +/// Returns an identity affine map (d0, ..., dn) -> (dp, ..., dn) on the most +/// minor dimensions. +AffineMap AffineMap::getMinorIdentityMap(unsigned dims, unsigned results, + MLIRContext *context) { + assert(dims >= results && "Dimension mismatch"); + auto id = AffineMap::getMultiDimIdentityMap(dims, context); + return AffineMap::get(dims, 0, id.getResults().take_back(results), context); +} + +bool AffineMap::isMinorIdentity(AffineMap map) { + if (!map) + return false; + return map == getMinorIdentityMap(map.getNumDims(), map.getNumResults(), + map.getContext()); +}; + /// Returns an AffineMap representing a permutation. AffineMap AffineMap::getPermutationMap(ArrayRef permutation, MLIRContext *context) { diff --git a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir --- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir @@ -1,7 +1,4 @@ -// RUN: mlir-opt %s -test-convert-vector-to-loops | FileCheck %s - -// CHECK: #[[ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> +// RUN: mlir-opt %s -test-convert-vector-to-loops -split-input-file | FileCheck %s // CHECK-LABEL: func @materialize_read_1d() { func @materialize_read_1d() { @@ -27,6 +24,8 @@ return } +// ----- + // CHECK-LABEL: func @materialize_read_1d_partially_specialized func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) { %f0 = constant 0.0: f32 @@ -50,6 +49,11 @@ return } +// ----- + +// CHECK: #[[ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK: #[[SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> + // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { %f0 = constant 0.0: f32 @@ -122,6 +126,11 @@ return } +// ----- + +// CHECK: #[[ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK: #[[SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> + // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-DAG: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32> @@ -198,3 +207,67 @@ } return } + +// ----- + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d1)> + +// CHECK-LABEL: transfer_read_progressive( +// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index +func @transfer_read_progressive(%A : memref, %base: index) -> vector<17x15xf32> { + // CHECK: %[[cst:.*]] = constant 7.000000e+00 : f32 + %f7 = constant 7.0: f32 + + // CHECK-DAG: %[[cond0:.*]] = constant 1 : i1 + // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32> + // CHECK-DAG: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> + // CHECK-DAG: %[[dim:.*]] = dim %[[A]], 0 : memref + // CHECK: affine.for %[[I:.*]] = 0 to 17 { + // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] + // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index + // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 + // CHECK: loop.if %[[cond1]] { + // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]] {permutation_map = #[[MAP1]]} : memref, vector<15xf32> + // CHECK: store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> + // CHECK: } else { + // CHECK: store %[[splat]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> + // CHECK: } + // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<17xvector<15xf32>> to memref> + // CHECK: %[[cst:.*]] = load %[[vmemref]][] : memref> + %f = vector.transfer_read %A[%base, %base], %f7 + {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : + memref, vector<17x15xf32> + + return %f: vector<17x15xf32> +} + +// ----- + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d1)> + +// CHECK-LABEL: transfer_write_progressive( +// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<17x15xf32> +func @transfer_write_progressive(%A : memref, %base: index, %vec: vector<17x15xf32>) { + // CHECK: %[[cond0:.*]] = constant 1 : i1 + // CHECK: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> + // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<17xvector<15xf32>> to memref> + // CHECK: store %[[vec]], %[[vmemref]][] : memref> + // CHECK: %[[dim:.*]] = dim %[[A]], 0 : memref + // CHECK: affine.for %[[I:.*]] = 0 to 17 { + // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] + // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index + // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 + // CHECK: loop.if %[[cond1]] { + // CHECK: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>> + // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {permutation_map = #[[MAP1]]} : vector<15xf32>, memref + // CHECK: } + vector.transfer_write %vec, %A[%base, %base] + {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : + vector<17x15xf32>, memref + return +} diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -182,6 +182,8 @@ OpBuilder builder(f.getBody()); ScopedContext scope(builder, f.getLoc()); Value c1(std_constant_int(42, 32)), c2(std_constant_int(1234, 32)); + ReturnOp ret = std_ret(); + Value r; Value args12[2]; Value &arg1 = args12[0], &arg2 = args12[1]; @@ -204,6 +206,7 @@ }); // Get back to entry block and add a branch into b1 BlockBuilder(functionBlock, Append())([&] { std_br(b1, {c1, c2}); }); + ret.erase(); // clang-format off // CHECK-LABEL: @builder_blocks @@ -273,6 +276,8 @@ Value funcArg(f.getArgument(0)); Value c32(std_constant_int(32, 32)), c64(std_constant_int(64, 64)), c42(std_constant_int(42, 32)); + ReturnOp ret = std_ret(); + Value arg1; Value args23[2]; BlockHandle b1, b2, functionBlock(&f.front()); @@ -282,6 +287,7 @@ BlockBuilder(functionBlock, Append())([&] { std_cond_br(funcArg, b1, {c32}, b2, {c64, c42}); }); + ret.erase(); // clang-format off // CHECK-LABEL: @builder_cond_branch diff --git a/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp b/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp --- a/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp +++ b/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp @@ -22,7 +22,7 @@ void runOnFunction() override { OwningRewritePatternList patterns; auto *context = &getContext(); - populateVectorToAffineLoopsConversionPatterns(context, patterns); + populateVectorToLoopsConversionPatterns(patterns, context); applyPatternsAndFoldGreedily(getFunction(), patterns); } };