diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -12,7 +12,6 @@ #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" diff --git a/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h deleted file mode 100644 --- a/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h +++ /dev/null @@ -1,86 +0,0 @@ -//===- Intrinsics.h - MLIR EDSC Intrinsics for MemRefOps --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_ -#define MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_ - -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/StandardOps/EDSC/Builders.h" -#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" -#include "mlir/EDSC/Builders.h" - -#include "llvm/ADT/SmallVector.h" - -namespace mlir { -namespace edsc { -namespace intrinsics { - -using memref_alloc = ValueBuilder; -using memref_alloca = ValueBuilder; -using memref_cast = ValueBuilder; -using memref_dealloc = OperationBuilder; -using memref_dim = ValueBuilder; -using memref_load = ValueBuilder; -using memref_store = OperationBuilder; -using memref_sub_view = ValueBuilder; -using memref_tensor_load = ValueBuilder; -using memref_tensor_store = OperationBuilder; -using memref_view = ValueBuilder; - -} // namespace intrinsics -} // namespace edsc -} // namespace mlir - -static inline ::llvm::SmallVector -getMemRefSizes(mlir::Value memRef) { - using namespace mlir; - using namespace mlir::edsc; - using namespace mlir::edsc::intrinsics; - mlir::MemRefType memRefType = memRef.getType().cast(); - assert(isStrided(memRefType) && "Expected strided MemRef type"); - - SmallVector res; - res.reserve(memRefType.getShape().size()); - const auto &shape = memRefType.getShape(); - for (unsigned idx = 0, n = shape.size(); idx < n; ++idx) { - if (shape[idx] == -1) - res.push_back(memref_dim(memRef, idx)); - else - res.push_back(std_constant_index(shape[idx])); - } - return res; -} - -namespace mlir { -namespace edsc { - -/// A MemRefBoundsCapture represents the information required to step through a -/// MemRef. It has placeholders for non-contiguous tensors that fit within the -/// Fortran subarray model. -/// At the moment it can only capture a MemRef with an identity layout map. -// TODO: Support MemRefs with layoutMaps. -class MemRefBoundsCapture : public BoundsCapture { -public: - explicit MemRefBoundsCapture(Value v) { - auto memrefSizeValues = getMemRefSizes(v); - for (auto s : memrefSizeValues) { - lbs.push_back(intrinsics::std_constant_index(0)); - ubs.push_back(s); - steps.push_back(1); - } - } - - unsigned fastestVarying() const { return rank() - 1; } - -private: - Value base; -}; - -} // namespace edsc -} // namespace mlir - -#endif // MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_ diff --git a/mlir/include/mlir/IR/ImplicitLocOpBuilder.h b/mlir/include/mlir/IR/ImplicitLocOpBuilder.h --- a/mlir/include/mlir/IR/ImplicitLocOpBuilder.h +++ b/mlir/include/mlir/IR/ImplicitLocOpBuilder.h @@ -63,7 +63,7 @@ /// Create an operation of specific op type at the current insertion point and /// location. template - OpTy create(Args &&... args) { + OpTy create(Args &&...args) { return OpBuilder::create(curLoc, std::forward(args)...); } @@ -71,7 +71,7 @@ /// and immediately try to fold it. This functions populates 'results' with /// the results after folding the operation. template - void createOrFold(llvm::SmallVectorImpl &results, Args &&... args) { + void createOrFold(llvm::SmallVectorImpl &results, Args &&...args) { OpBuilder::createOrFold(results, curLoc, std::forward(args)...); } @@ -79,7 +79,7 @@ template typename std::enable_if(), Value>::type - createOrFold(Args &&... args) { + createOrFold(Args &&...args) { return OpBuilder::createOrFold(curLoc, std::forward(args)...); } @@ -87,7 +87,7 @@ template typename std::enable_if(), OpTy>::type - createOrFold(Args &&... args) { + createOrFold(Args &&...args) { return OpBuilder::createOrFold(curLoc, std::forward(args)...); } diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp --- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp @@ -13,9 +13,9 @@ #include "mlir/Dialect/GPU/MemoryPromotion.h" #include "mlir/Dialect/GPU/GPUDialect.h" -#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/LoopUtils.h" @@ -41,45 +41,46 @@ /// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with /// single-iteration loops. Maps the innermost loops to thread dimensions, in /// reverse order to enable access coalescing in the innermost loop. -static void insertCopyLoops(OpBuilder &b, Location loc, - MemRefBoundsCapture &bounds, Value from, Value to) { - // Create EDSC handles for bounds. - unsigned rank = bounds.rank(); +static void insertCopyLoops(ImplicitLocOpBuilder &b, Value from, Value to) { + auto memRefType = from.getType().cast(); + auto rank = memRefType.getRank(); + SmallVector lbs, ubs, steps; + Value zero = b.create(0); + Value one = b.create(1); // Make sure we have enough loops to use all thread dimensions, these trivial // loops should be outermost and therefore inserted first. if (rank < GPUDialect::getNumWorkgroupDimensions()) { unsigned extraLoops = GPUDialect::getNumWorkgroupDimensions() - rank; - Value zero = std_constant_index(0); - Value one = std_constant_index(1); lbs.resize(extraLoops, zero); ubs.resize(extraLoops, one); steps.resize(extraLoops, one); } // Add existing bounds. - lbs.append(bounds.getLbs().begin(), bounds.getLbs().end()); - ubs.append(bounds.getUbs().begin(), bounds.getUbs().end()); - - // Emit constant operations for steps. + lbs.append(rank, zero); + ubs.reserve(lbs.size()); steps.reserve(lbs.size()); - llvm::transform(bounds.getSteps(), std::back_inserter(steps), - [](int64_t step) { return std_constant_index(step); }); + for (auto idx = 0; idx < rank; ++idx) { + ubs.push_back( + b.createOrFold(from, b.create(idx))); + steps.push_back(one); + } // Obtain thread identifiers and block sizes, necessary to map to them. auto indexType = b.getIndexType(); SmallVector threadIds, blockDims; for (unsigned i = 0; i < 3; ++i) { auto dimName = b.getStringAttr(getDimName(i)); - threadIds.push_back(b.create(loc, indexType, dimName)); - blockDims.push_back(b.create(loc, indexType, dimName)); + threadIds.push_back(b.create(indexType, dimName)); + blockDims.push_back(b.create(indexType, dimName)); } // Produce the loop nest with copies. SmallVector ivs(lbs.size()); mlir::scf::buildLoopNest( - b, loc, lbs, ubs, steps, + b, b.getLoc(), lbs, ubs, steps, [&](OpBuilder &b, Location loc, ValueRange loopIvs) { ivs.assign(loopIvs.begin(), loopIvs.end()); auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank); @@ -142,17 +143,13 @@ assert(llvm::hasSingleElement(region) && "unstructured control flow not supported"); - OpBuilder b(region.getContext()); - b.setInsertionPointToStart(®ion.front()); - - ScopedContext edscContext(b, loc); - MemRefBoundsCapture fromBoundsCapture(from); - insertCopyLoops(b, loc, fromBoundsCapture, from, to); - b.create(loc); + auto b = ImplicitLocOpBuilder::atBlockBegin(loc, ®ion.front()); + insertCopyLoops(b, from, to); + b.create(); b.setInsertionPoint(®ion.front().back()); - b.create(loc); - insertCopyLoops(b, loc, fromBoundsCapture, to, from); + b.create(); + insertCopyLoops(b, to, from); } /// Promotes a function argument to workgroup memory in the given function. The diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -18,7 +18,6 @@ #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" -#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" @@ -114,13 +113,13 @@ /// Fuses the producer by cloning the `producer`. The `fusedLoopsAndRanges` /// provides the loop range information for the fused loops. The rest are /// obtained from the producer itself, since they are not tiled + fused. -static LinalgOp fuse(OpBuilder &builder, LinalgOp producer, +static LinalgOp fuse(OpBuilder &b, LinalgOp producer, const DenseMap &fusedLoopsAndRanges) { SmallVector ivs, tileSizes, sizeBounds; SmallVector loopRanges; - auto zero = std_constant_index(0); - auto one = std_constant_index(1); Location loc = producer.getLoc(); + auto zero = b.create(loc, 0); + auto one = b.create(loc, 1); for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) { auto it = fusedLoopsAndRanges.find(i); @@ -133,7 +132,8 @@ << loopRanges.back() << "\n"); } else { auto shapeDim = getShapeDefiningLoopRange(producer, i); - Value dim = memref_dim(shapeDim.shape, shapeDim.dimension); + Value dim = b.createOrFold(loc, shapeDim.shape, + shapeDim.dimension); tileSizes.push_back(zero); sizeBounds.push_back(dim); loopRanges.push_back(Range{zero, dim, one}); @@ -147,8 +147,8 @@ // Compute subranges for all tensor input/output operands. auto tiledOperands = llvm::to_vector<4>(producer.getShapedOperands()); - clonedShapes.append(makeTiledShapes(builder, loc, producer, tiledOperands, - ivs, tileSizes, sizeBounds)); + clonedShapes.append(makeTiledShapes(b, loc, producer, tiledOperands, ivs, + tileSizes, sizeBounds)); // Append the other operands. auto operands = producer.getAssumedNonShapedOperands(); @@ -172,7 +172,7 @@ staticStridesVector)); } - Operation *clonedOp = producer.clone(builder, loc, resultTypes, clonedShapes); + Operation *clonedOp = producer.clone(b, loc, resultTypes, clonedShapes); // When the producer has index semantics, we have to transform the indices of // the producer according to the tiling of the consumer, i.e. offset them by // the values computed in `loopRanges`. @@ -184,11 +184,11 @@ // Shift all indices by the tile offset. Block &block = clonedOp->getRegion(0).front(); for (IndexOp indexOp : block.getOps()) { - OpBuilder::InsertionGuard g(builder); - builder.setInsertionPointAfter(indexOp); + OpBuilder::InsertionGuard g(b); + b.setInsertionPointAfter(indexOp); AffineExpr index, offset; - bindDims(builder.getContext(), index, offset); - AffineApplyOp applyOp = builder.create( + bindDims(b.getContext(), index, offset); + AffineApplyOp applyOp = b.create( indexOp.getLoc(), index + offset, ValueRange{indexOp.getResult(), loopRanges[indexOp.dim()].offset}); indexOp.getResult().replaceAllUsesExcept(applyOp, applyOp); @@ -770,17 +770,18 @@ /// Tile the fused loops in the root operation, by setting the tile sizes for /// all other loops to zero (those will be tiled later). -static Optional tileRootOperation( - OpBuilder &builder, LinalgOp op, ArrayRef tileSizeVector, - const LinalgTilingOptions &options, const std::set &fusedLoops) { +static Optional +tileRootOperation(OpBuilder &b, LinalgOp op, ArrayRef tileSizeVector, + const LinalgTilingOptions &options, + const std::set &fusedLoops) { SmallVector tileSizes(tileSizeVector.begin(), tileSizeVector.end()); - auto zero = std_constant_index(0); + auto zero = b.create(op.getLoc(), 0); for (unsigned i = 0, e = tileSizes.size(); i != e; ++i) if (!fusedLoops.count(i)) tileSizes[i] = zero; LinalgTilingOptions tileFusedLoopsOptions = options; tileFusedLoopsOptions.setTileSizes(tileSizes); - return tileLinalgOp(builder, op, tileFusedLoopsOptions); + return tileLinalgOp(b, op, tileFusedLoopsOptions); } /// Fuse the operations in `fusionCandidates` with `tiledOp`. Latter is expected @@ -788,19 +789,19 @@ /// `fusionCandidates`, i.e. move the operation within the inter-tile loops of /// `tiledOp`. static SmallVector -fuseOperations(OpBuilder &builder, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp, +fuseOperations(OpBuilder &b, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp, ArrayRef fusionCandidates, const FusableOpDependencesTy &fusableDependences, const std::set &fusedLoops) { LinalgOp tiledOp = tiledLinalgOp.op; - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPoint(tiledOp); + OpBuilder::InsertionGuard guard(b); + b.setInsertionPoint(tiledOp); DenseMap fusedLoopsAndRanges; for (unsigned loop : fusedLoops) { ShapeDimension shapeDim = getShapeDefiningLoopRange(tiledOp, loop, true); fusedLoopsAndRanges[loop] = getRangeFromOperandShape( - builder, tiledOp.getLoc(), shapeDim.shape, shapeDim.dimension); + b, tiledOp.getLoc(), shapeDim.shape, shapeDim.dimension); } SmallVector fusedOps(fusionCandidates.size()); @@ -808,13 +809,12 @@ origOpToFusedOp[rootOp.getOperation()] = tiledOp; for (auto candidate : enumerate(llvm::reverse(fusionCandidates))) { LinalgOp origOp = candidate.value(); - LinalgOp fusedOp = fuse(builder, origOp, fusedLoopsAndRanges); + LinalgOp fusedOp = fuse(b, origOp, fusedLoopsAndRanges); origOpToFusedOp[origOp.getOperation()] = fusedOp; fusedOps[fusionCandidates.size() - candidate.index() - 1] = fusedOp; - // Prepare the builder for the next insertion point. - auto guard = - llvm::make_scope_exit([&]() { builder.setInsertionPoint(fusedOp); }); + // Prepare the b for the next insertion point. + auto guard = llvm::make_scope_exit([&]() { b.setInsertionPoint(fusedOp); }); if (!origOp.hasTensorSemantics()) continue; @@ -860,7 +860,7 @@ template static Optional -tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef ops, +tileAndFuseLinalgOpsImpl(OpBuilder &b, ArrayRef ops, const LinalgDependenceGraph &dependenceGraph, const LinalgTilingOptions &tilingOptions) { if (ops.size() < 2) @@ -884,9 +884,9 @@ return llvm::None; } - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPoint(rootOp); - ScopedContext scope(builder, rootOp.getLoc()); + OpBuilder::InsertionGuard guard(b); + b.setInsertionPoint(rootOp); + ScopedContext scope(b, rootOp.getLoc()); // Find all the producers. LLVM_DEBUG(llvm::dbgs() << "findAllFusableDependences\n"); @@ -911,9 +911,9 @@ // Tile the fused loops in the last operation in the list. SmallVector tileSizeVector = - tilingOptions.tileSizeComputationFunction(builder, rootOp); + tilingOptions.tileSizeComputationFunction(b, rootOp); Optional tiledRootOp = tileRootOperation( - builder, rootOp, tileSizeVector, tilingOptions, ret.fusedLoopDims); + b, rootOp, tileSizeVector, tilingOptions, ret.fusedLoopDims); if (!tiledRootOp) { rootOp.emitRemark("failed to tile the fused loops"); return llvm::None; @@ -922,24 +922,23 @@ ret.fusedLoops.assign(tiledRootOp->loops.begin(), tiledRootOp->loops.end()); // Fuse the other operations into the fused inter-tile loops produced above. - ret.fusedProducers = - fuseOperations(builder, rootOp, *tiledRootOp, ops.drop_back(), - fusableDependences, ret.fusedLoopDims); + ret.fusedProducers = fuseOperations(b, rootOp, *tiledRootOp, ops.drop_back(), + fusableDependences, ret.fusedLoopDims); return ret; } Optional -mlir::linalg::tileAndFuseLinalgOps(OpBuilder &builder, ArrayRef ops, +mlir::linalg::tileAndFuseLinalgOps(OpBuilder &b, ArrayRef ops, const LinalgDependenceGraph &dependenceGraph, const LinalgTilingOptions &tilingOptions) { switch (tilingOptions.loopType) { case LinalgTilingLoopType::Loops: - return tileAndFuseLinalgOpsImpl(builder, ops, dependenceGraph, + return tileAndFuseLinalgOpsImpl(b, ops, dependenceGraph, tilingOptions); case LinalgTilingLoopType::ParallelLoops: - return tileAndFuseLinalgOpsImpl( - builder, ops, dependenceGraph, tilingOptions); + return tileAndFuseLinalgOpsImpl(b, ops, dependenceGraph, + tilingOptions); default:; } return llvm::None; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -16,7 +16,6 @@ #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" -#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" @@ -75,8 +74,8 @@ // Create a new range with the applied tile sizes. SmallVector res; for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) - res.push_back( - Range{std_constant_index(0), shapeSizes[idx], tileSizes[idx]}); + res.push_back(Range{b.create(loc, 0), shapeSizes[idx], + tileSizes[idx]}); return std::make_tuple(res, loopIndexToRangeIndex); } @@ -330,7 +329,7 @@ SmallVector tileSizeVector = options.tileSizeComputationFunction(b, op); if (tileSizeVector.size() < nLoops) { - auto zero = std_constant_index(0); + auto zero = b.create(op.getLoc(), 0); tileSizeVector.append(nLoops - tileSizeVector.size(), zero); } diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -525,10 +525,11 @@ for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) { LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n"); bool isTiled = !isZero(tileSizes[idx]); - lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0)); + lbs.push_back(isTiled ? ivs[idxIvs++] + : (Value)b.create(loc, 0)); // Before composing, we need to make range a closed interval. Value size = isTiled ? tileSizes[idx] : sizeBounds[idx]; - subShapeSizes.push_back(size - std_constant_index(1)); + subShapeSizes.push_back(size - b.create(loc, 1)); LLVM_DEBUG(llvm::dbgs() << "lb: " << lbs.back() << "\n"); LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n"); } @@ -560,7 +561,7 @@ LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r); if (!isTiled(map.getSubMap({r}), tileSizes)) { offsets.push_back(b.getIndexAttr(0)); - Value dim = memref_dim(shapedOp, r).value; + Value dim = b.createOrFold(loc, shapedOp, r); sizes.push_back(dim); strides.push_back(b.getIndexAttr(1)); LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n"); @@ -576,7 +577,7 @@ offsets.push_back(offset); auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front(); // Resulting size needs to be made half open interval again. - auto size = closedIntSize + std_constant_index(1); + auto size = closedIntSize + b.create(loc, 1); LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n"); // The size of the subview / subtensor should be trimmed to avoid