diff --git a/mlir/include/mlir/Transforms/LoopFusionUtils.h b/mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h rename from mlir/include/mlir/Transforms/LoopFusionUtils.h rename to mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h --- a/mlir/include/mlir/Transforms/LoopFusionUtils.h +++ b/mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_TRANSFORMS_LOOPFUSIONUTILS_H -#define MLIR_TRANSFORMS_LOOPFUSIONUTILS_H +#ifndef MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H +#define MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H #include "mlir/IR/Value.h" #include "mlir/Support/LLVM.h" @@ -167,4 +167,4 @@ DenseSet &producerConsumerMemrefs); } // namespace mlir -#endif // MLIR_TRANSFORMS_LOOPFUSIONUTILS_H +#endif // MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h rename from mlir/include/mlir/Transforms/LoopUtils.h rename to mlir/include/mlir/Dialect/Affine/LoopUtils.h --- a/mlir/include/mlir/Transforms/LoopUtils.h +++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_TRANSFORMS_LOOPUTILS_H -#define MLIR_TRANSFORMS_LOOPUTILS_H +#ifndef MLIR_DIALECT_AFFINE_LOOPUTILS_H +#define MLIR_DIALECT_AFFINE_LOOPUTILS_H #include "mlir/IR/Block.h" #include "mlir/Support/LLVM.h" @@ -321,9 +321,6 @@ separateFullTiles(MutableArrayRef nest, SmallVectorImpl *fullTileNest = nullptr); -/// Move loop invariant code out of `looplike`. -LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike); - } // namespace mlir -#endif // MLIR_TRANSFORMS_LOOPUTILS_H +#endif // MLIR_DIALECT_AFFINE_LOOPUTILS_H diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -21,6 +21,10 @@ class AffineForOp; +/// Fusion mode to attempt. The default mode `Greedy` does both +/// producer-consumer and sibling fusion. +enum FusionMode { Greedy, ProducerConsumer, Sibling }; + /// Creates a simplification pass for affine structures (maps and sets). In /// addition, this pass also normalizes memrefs to have the trivial (identity) /// layout map. @@ -53,6 +57,19 @@ /// dead allocs. std::unique_ptr> createAffineScalarReplacementPass(); +/// Creates a pass that transforms perfectly nested loops with independent +/// bounds into a single loop. +std::unique_ptr> createLoopCoalescingPass(); + +/// Creates a loop fusion pass which fuses loops according to type of fusion +/// specified in `fusionMode`. Buffers of size less than or equal to +/// `localBufSizeThreshold` are promoted to memory space `fastMemorySpace`. +std::unique_ptr> +createLoopFusionPass(unsigned fastMemorySpace = 0, + uint64_t localBufSizeThreshold = 0, + bool maximalFusion = false, + enum FusionMode fusionMode = FusionMode::Greedy); + /// Creates a pass to perform tiling on loop nests. std::unique_ptr> createLoopTilingPass(uint64_t cacheSizeBytes); @@ -76,6 +93,10 @@ std::unique_ptr> createLoopUnrollAndJamPass(int unrollJamFactor = -1); +/// Creates a pass to pipeline explicit movement of data across levels of the +/// memory hierarchy. +std::unique_ptr> createPipelineDataTransferPass(); + /// Creates a pass to vectorize loops, operations and data types using a /// target-independent, n-D super-vector abstraction. std::unique_ptr> diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -43,6 +43,138 @@ ]; } +def AffineLoopFusion : Pass<"affine-loop-fusion", "FuncOp"> { + let summary = "Fuse affine loop nests"; + let description = [{ + This pass performs fusion of loop nests using a slicing-based approach. It + combines two fusion strategies: producer-consumer fusion and sibling fusion. + Producer-consumer fusion is aimed at fusing pairs of loops where the first + one writes to a memref that the second reads. Sibling fusion targets pairs + of loops that share no dependences between them but that load from the same + memref. The fused loop nests, when possible, are rewritten to access + significantly smaller local buffers instead of the original memref's, and + the latter are often either completely optimized away or contracted. This + transformation leads to enhanced locality and lower memory footprint through + the elimination or contraction of temporaries/intermediate memref's. These + benefits are sometimes achieved at the expense of redundant computation + through a cost model that evaluates available choices such as the depth at + which a source slice should be materialized in the designation slice. + + Example 1: Producer-consumer fusion. + Input: + ```mlir + func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) { + %0 = memref.alloc() : memref<10xf32> + %1 = memref.alloc() : memref<10xf32> + %cst = arith.constant 0.000000e+00 : f32 + affine.for %arg2 = 0 to 10 { + affine.store %cst, %0[%arg2] : memref<10xf32> + affine.store %cst, %1[%arg2] : memref<10xf32> + } + affine.for %arg2 = 0 to 10 { + %2 = affine.load %0[%arg2] : memref<10xf32> + %3 = arith.addf %2, %2 : f32 + affine.store %3, %arg0[%arg2] : memref<10xf32> + } + affine.for %arg2 = 0 to 10 { + %2 = affine.load %1[%arg2] : memref<10xf32> + %3 = arith.mulf %2, %2 : f32 + affine.store %3, %arg1[%arg2] : memref<10xf32> + } + return + } + ``` + Output: + ```mlir + func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) { + %0 = memref.alloc() : memref<1xf32> + %1 = memref.alloc() : memref<1xf32> + %cst = arith.constant 0.000000e+00 : f32 + affine.for %arg2 = 0 to 10 { + affine.store %cst, %0[0] : memref<1xf32> + affine.store %cst, %1[0] : memref<1xf32> + %2 = affine.load %1[0] : memref<1xf32> + %3 = arith.mulf %2, %2 : f32 + affine.store %3, %arg1[%arg2] : memref<10xf32> + %4 = affine.load %0[0] : memref<1xf32> + %5 = arith.addf %4, %4 : f32 + affine.store %5, %arg0[%arg2] : memref<10xf32> + } + return + } + ``` + + Example 2: Sibling fusion. + Input: + ```mlir + func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>, + %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>, + %arg4: memref<10x10xf32>) { + affine.for %arg5 = 0 to 3 { + affine.for %arg6 = 0 to 3 { + %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> + %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32> + %2 = arith.mulf %0, %1 : f32 + affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32> + } + } + affine.for %arg5 = 0 to 3 { + affine.for %arg6 = 0 to 3 { + %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> + %1 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32> + %2 = arith.addf %0, %1 : f32 + affine.store %2, %arg4[%arg5, %arg6] : memref<10x10xf32> + } + } + return + } + ``` + Output: + ```mlir + func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>, + %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>, + %arg4: memref<10x10xf32>) { + affine.for %arg5 = 0 to 3 { + affine.for %arg6 = 0 to 3 { + %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> + %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32> + %2 = arith.mulf %0, %1 : f32 + affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32> + %3 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> + %4 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32> + %5 = arith.addf %3, %4 : f32 + affine.store %5, %arg4[%arg5, %arg6] : memref<10x10xf32> + } + } + return + } + ``` + }]; + let constructor = "mlir::createLoopFusionPass()"; + let options = [ + Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double", + /*default=*/"0.30f", "Fractional increase in additional computation " + "tolerated while fusing">, + Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned", + /*default=*/"0", + "Faster memory space number to promote fusion buffers to">, + Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t", + /*default=*/"0", "Threshold size (KiB) for promoting local buffers " + "to fast memory space">, + Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false", + "Enables maximal loop fusion">, + Option<"affineFusionMode", "mode", "enum FusionMode", + "mlir::FusionMode::Greedy", "fusion mode to attempt", + "llvm::cl::values(clEnumValN(mlir::FusionMode::Greedy," + " \"greedy\", \"Perform greedy (both producer-consumer and sibling) fusion\"), " + "clEnumValN( mlir::FusionMode::ProducerConsumer, " + "\"producer\", \"Perform only producer-consumer fusion\"), " + "clEnumValN( mlir::FusionMode::Sibling, " + "\"sibling\", \"Perform only sibling fusion\"))">, + ]; + let dependentDialects = ["memref::MemRefDialect"]; +} + def AffineLoopInvariantCodeMotion : Pass<"affine-loop-invariant-code-motion", "FuncOp"> { let summary = "Hoist loop invariant instructions outside of affine loops"; @@ -94,6 +226,75 @@ ]; } +def AffinePipelineDataTransfer + : Pass<"affine-pipeline-data-transfer", "FuncOp"> { + let summary = "Pipeline non-blocking data transfers between explicitly " + "managed levels of the memory hierarchy"; + let description = [{ + This pass performs a transformation to overlap non-blocking DMA operations + in a loop with computations through double buffering. This is achieved by + advancing dma_start operations with respect to other operations. + + Input + + ```mlir + func @pipelinedatatransfer() { + %0 = memref.alloc() : memref<256xf32> + %1 = memref.alloc() : memref<32xf32, 1> + %2 = memref.alloc() : memref<1xf32> + %c0 = arith.constant 0 : index + %c128 = arith.constant 128 : index + affine.for %i0 = 0 to 8 { + affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32> + affine.dma_wait %2[%c0], %c128 : memref<1xf32> + %3 = affine.load %1[%i0] : memref<32xf32, 1> + %4 = "compute"(%3) : (f32) -> f32 + affine.store %4, %1[%i0] : memref<32xf32, 1> + } + return + } + ``` + + Output + + ```mlir + module { + func @pipelinedatatransfer() { + %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index + %0 = memref.alloc() : memref<256xf32> + %c0_0 = arith.constant 0 : index + %c128 = arith.constant 128 : index + %1 = memref.alloc() : memref<2x32xf32, 1> + %2 = memref.alloc() : memref<2x1xf32> + affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32> + affine.for %arg0 = 1 to 8 { + affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32> + %8 = affine.apply #map3(%arg0) + %9 = affine.apply #map4(%8) + %10 = affine.apply #map4(%8) + affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32> + %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1> + %12 = "compute"(%11) : (f32) -> f32 + affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1> + } + %3 = affine.apply #map3(%c8) + %4 = affine.apply #map4(%3) + %5 = affine.apply #map4(%3) + affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32> + %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1> + %7 = "compute"(%6) : (f32) -> f32 + affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1> + memref.dealloc %2 : memref<2x1xf32> + memref.dealloc %1 : memref<2x32xf32, 1> + return + } + } + ``` + }]; + let constructor = "mlir::createPipelineDataTransferPass()"; +} + def AffineScalarReplacement : Pass<"affine-scalrep", "FuncOp"> { let summary = "Replace affine memref acceses by scalars by forwarding stores " "to loads and eliminating redundant loads"; @@ -184,6 +385,13 @@ let constructor = "mlir::createAffineLoopNormalizePass()"; } +def LoopCoalescing : Pass<"loop-coalescing", "FuncOp"> { + let summary = "Coalesce nested loops with independent bounds into a single " + "loop"; + let constructor = "mlir::createLoopCoalescingPass()"; + let dependentDialects = ["arith::ArithmeticDialect"]; +} + def SimplifyAffineStructures : Pass<"simplify-affine-structures", "FuncOp"> { let summary = "Simplify affine expressions in maps/sets and normalize " "memrefs"; diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h --- a/mlir/include/mlir/Dialect/Affine/Utils.h +++ b/mlir/include/mlir/Dialect/Affine/Utils.h @@ -24,6 +24,10 @@ class Operation; class PostDominanceInfo; +namespace memref { +class AllocOp; +} // namespace memref + struct LogicalResult; using ReductionLoopMap = DenseMap>; @@ -168,6 +172,121 @@ AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, AffineExpr max, bool positivePath = true); +/// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while +/// optionally remapping the old memref's indices using the supplied affine map, +/// `indexRemap`. The new memref could be of a different shape or rank. +/// `extraIndices` provides any additional access indices to be added to the +/// start. +/// +/// `indexRemap` remaps indices of the old memref access to a new set of indices +/// that are used to index the memref. Additional input operands to indexRemap +/// can be optionally provided in `extraOperands`, and they occupy the start +/// of its input list. `indexRemap`'s dimensional inputs are expected to +/// correspond to memref's indices, and its symbolic inputs if any should be +/// provided in `symbolOperands`. +/// +/// `domOpFilter`, if non-null, restricts the replacement to only those +/// operations that are dominated by the former; similarly, `postDomOpFilter` +/// restricts replacement to only those operations that are postdominated by it. +/// +/// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing +/// uses of a memref without any requirement for access index rewrites as long +/// as the user operation has the MemRefsNormalizable trait. The default value +/// of this flag is false. +/// +/// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to +/// also be a candidate for replacement. The default value of this flag is +/// false. +/// +/// Returns true on success and false if the replacement is not possible, +/// whenever a memref is used as an operand in a non-dereferencing context and +/// 'allowNonDereferencingOps' is false, except for dealloc's on the memref +/// which are left untouched. See comments at function definition for an +/// example. +// +// Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]: +// The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and +// index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0, +// d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any +// extra operands, note that 'indexRemap' would just be applied to existing +// indices (%i, %j). +// TODO: allow extraIndices to be added at any position. +LogicalResult replaceAllMemRefUsesWith( + Value oldMemRef, Value newMemRef, ArrayRef extraIndices = {}, + AffineMap indexRemap = AffineMap(), ArrayRef extraOperands = {}, + ArrayRef symbolOperands = {}, Operation *domOpFilter = nullptr, + Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false, + bool replaceInDeallocOp = false); + +/// Performs the same replacement as the other version above but only for the +/// dereferencing uses of `oldMemRef` in `op`, except in cases where +/// 'allowNonDereferencingOps' is set to true where we replace the +/// non-dereferencing uses as well. +LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, + Operation *op, + ArrayRef extraIndices = {}, + AffineMap indexRemap = AffineMap(), + ArrayRef extraOperands = {}, + ArrayRef symbolOperands = {}, + bool allowNonDereferencingOps = false); + +/// Rewrites the memref defined by this alloc op to have an identity layout map +/// and updates all its indexing uses. Returns failure if any of its uses +/// escape (while leaving the IR in a valid state). +LogicalResult normalizeMemRef(memref::AllocOp *op); + +/// Uses the old memref type map layout and computes the new memref type to have +/// a new shape and a layout map, where the old layout map has been normalized +/// to an identity layout map. It returns the old memref in case no +/// normalization was needed or a failure occurs while transforming the old map +/// layout to an identity layout map. +MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder, + unsigned numSymbolicOperands); + +/// Creates and inserts into 'builder' a new AffineApplyOp, with the number of +/// its results equal to the number of operands, as a composition +/// of all other AffineApplyOps reachable from input parameter 'operands'. If +/// different operands were drawing results from multiple affine apply ops, +/// these will also be collected into a single (multi-result) affine apply op. +/// The final results of the composed AffineApplyOp are returned in output +/// parameter 'results'. Returns the affine apply op created. +Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc, + ArrayRef operands, + ArrayRef affineApplyOps, + SmallVectorImpl *results); + +/// Given an operation, inserts one or more single result affine apply +/// operations, results of which are exclusively used by this operation. +/// The operands of these newly created affine apply ops are +/// guaranteed to be loop iterators or terminal symbols of a function. +/// +/// Before +/// +/// affine.for %i = 0 to #map(%N) +/// %idx = affine.apply (d0) -> (d0 mod 2) (%i) +/// send %A[%idx], ... +/// %v = "compute"(%idx, ...) +/// +/// After +/// +/// affine.for %i = 0 to #map(%N) +/// %idx = affine.apply (d0) -> (d0 mod 2) (%i) +/// send %A[%idx], ... +/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i) +/// %v = "compute"(%idx_, ...) + +/// This allows the application of different transformations on send and +/// compute (for eg. different shifts/delays) +/// +/// Fills `sliceOps` with the list of affine.apply operations. +/// In the following cases, `sliceOps` remains empty: +/// 1. If none of opInst's operands were the result of an affine.apply +/// (i.e., there was no affine computation slice to create). +/// 2. If all the affine.apply op's supplying operands to this opInst did not +/// have any uses other than those in this opInst. +void createAffineComputationSlice(Operation *opInst, + SmallVectorImpl *sliceOps); + } // namespace mlir #endif // MLIR_DIALECT_AFFINE_UTILS_H diff --git a/mlir/include/mlir/Dialect/SCF/Passes.h b/mlir/include/mlir/Dialect/SCF/Passes.h --- a/mlir/include/mlir/Dialect/SCF/Passes.h +++ b/mlir/include/mlir/Dialect/SCF/Passes.h @@ -32,6 +32,10 @@ /// inside of scf.for loops with known lower and upper bounds. std::unique_ptr createSCFForLoopCanonicalizationPass(); +/// Creates a pass that transforms a single ParallelLoop over N induction +/// variables into another ParallelLoop over less than N induction variables. +std::unique_ptr createParallelLoopCollapsingPass(); + /// Creates a loop fusion pass which fuses parallel loops. std::unique_ptr createParallelLoopFusionPass(); diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td --- a/mlir/include/mlir/Dialect/SCF/Passes.td +++ b/mlir/include/mlir/Dialect/SCF/Passes.td @@ -52,6 +52,22 @@ let constructor = "mlir::createParallelLoopFusionPass()"; } +def SCFParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> { + let summary = "Collapse parallel loops to use less induction variables"; + let constructor = "mlir::createParallelLoopCollapsingPass()"; + let options = [ + ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned", + "Which loop indices to combine 0th loop index", + "llvm::cl::MiscFlags::CommaSeparated">, + ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned", + "Which loop indices to combine into the position 1 loop index", + "llvm::cl::MiscFlags::CommaSeparated">, + ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned", + "Which loop indices to combine into the position 2 loop index", + "llvm::cl::MiscFlags::CommaSeparated">, + ]; +} + def SCFParallelLoopSpecialization : Pass<"parallel-loop-specialization", "FuncOp"> { let summary = "Specialize parallel loops for vectorization"; diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.h b/mlir/include/mlir/Interfaces/LoopLikeInterface.h --- a/mlir/include/mlir/Interfaces/LoopLikeInterface.h +++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.h @@ -15,7 +15,20 @@ #include "mlir/IR/OpDefinition.h" +//===----------------------------------------------------------------------===// +// LoopLike Interfaces +//===----------------------------------------------------------------------===// + /// Include the generated interface declarations. #include "mlir/Interfaces/LoopLikeInterface.h.inc" +//===----------------------------------------------------------------------===// +// LoopLike Utilities +//===----------------------------------------------------------------------===// + +namespace mlir { +/// Move loop invariant code out of a `looplike` operation. +LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike); +} // namespace mlir + #endif // MLIR_INTERFACES_LOOPLIKEINTERFACE_H_ diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -22,13 +22,8 @@ namespace mlir { -class AffineForOp; class GreedyRewriteConfig; -/// Fusion mode to attempt. The default mode `Greedy` does both -/// producer-consumer and sibling fusion. -enum FusionMode { Greedy, ProducerConsumer, Sibling }; - //===----------------------------------------------------------------------===// // Passes //===----------------------------------------------------------------------===// @@ -53,31 +48,10 @@ /// Creates a pass to perform common sub expression elimination. std::unique_ptr createCSEPass(); -/// Creates a loop fusion pass which fuses loops according to type of fusion -/// specified in `fusionMode`. Buffers of size less than or equal to -/// `localBufSizeThreshold` are promoted to memory space `fastMemorySpace`. -std::unique_ptr> -createLoopFusionPass(unsigned fastMemorySpace = 0, - uint64_t localBufSizeThreshold = 0, - bool maximalFusion = false, - enum FusionMode fusionMode = FusionMode::Greedy); - /// Creates a loop invariant code motion pass that hoists loop invariant /// instructions out of the loop. std::unique_ptr createLoopInvariantCodeMotionPass(); -/// Creates a pass to pipeline explicit movement of data across levels of the -/// memory hierarchy. -std::unique_ptr> createPipelineDataTransferPass(); - -/// Creates a pass that transforms perfectly nested loops with independent -/// bounds into a single loop. -std::unique_ptr> createLoopCoalescingPass(); - -/// Creates a pass that transforms a single ParallelLoop over N induction -/// variables into another ParallelLoop over less than N induction variables. -std::unique_ptr createParallelLoopCollapsingPass(); - /// Creates a pass to strip debug information from a function. std::unique_ptr createStripDebugInfoPass(); diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -16,207 +16,6 @@ include "mlir/Pass/PassBase.td" include "mlir/Rewrite/PassUtil.td" -def AffineLoopFusion : Pass<"affine-loop-fusion", "FuncOp"> { - let summary = "Fuse affine loop nests"; - let description = [{ - This pass performs fusion of loop nests using a slicing-based approach. It - combines two fusion strategies: producer-consumer fusion and sibling fusion. - Producer-consumer fusion is aimed at fusing pairs of loops where the first - one writes to a memref that the second reads. Sibling fusion targets pairs - of loops that share no dependences between them but that load from the same - memref. The fused loop nests, when possible, are rewritten to access - significantly smaller local buffers instead of the original memref's, and - the latter are often either completely optimized away or contracted. This - transformation leads to enhanced locality and lower memory footprint through - the elimination or contraction of temporaries/intermediate memref's. These - benefits are sometimes achieved at the expense of redundant computation - through a cost model that evaluates available choices such as the depth at - which a source slice should be materialized in the designation slice. - - Example 1: Producer-consumer fusion. - Input: - ```mlir - func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) { - %0 = memref.alloc() : memref<10xf32> - %1 = memref.alloc() : memref<10xf32> - %cst = arith.constant 0.000000e+00 : f32 - affine.for %arg2 = 0 to 10 { - affine.store %cst, %0[%arg2] : memref<10xf32> - affine.store %cst, %1[%arg2] : memref<10xf32> - } - affine.for %arg2 = 0 to 10 { - %2 = affine.load %0[%arg2] : memref<10xf32> - %3 = arith.addf %2, %2 : f32 - affine.store %3, %arg0[%arg2] : memref<10xf32> - } - affine.for %arg2 = 0 to 10 { - %2 = affine.load %1[%arg2] : memref<10xf32> - %3 = arith.mulf %2, %2 : f32 - affine.store %3, %arg1[%arg2] : memref<10xf32> - } - return - } - ``` - Output: - ```mlir - func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) { - %0 = memref.alloc() : memref<1xf32> - %1 = memref.alloc() : memref<1xf32> - %cst = arith.constant 0.000000e+00 : f32 - affine.for %arg2 = 0 to 10 { - affine.store %cst, %0[0] : memref<1xf32> - affine.store %cst, %1[0] : memref<1xf32> - %2 = affine.load %1[0] : memref<1xf32> - %3 = arith.mulf %2, %2 : f32 - affine.store %3, %arg1[%arg2] : memref<10xf32> - %4 = affine.load %0[0] : memref<1xf32> - %5 = arith.addf %4, %4 : f32 - affine.store %5, %arg0[%arg2] : memref<10xf32> - } - return - } - ``` - - Example 2: Sibling fusion. - Input: - ```mlir - func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>, - %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>, - %arg4: memref<10x10xf32>) { - affine.for %arg5 = 0 to 3 { - affine.for %arg6 = 0 to 3 { - %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> - %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32> - %2 = arith.mulf %0, %1 : f32 - affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32> - } - } - affine.for %arg5 = 0 to 3 { - affine.for %arg6 = 0 to 3 { - %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> - %1 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32> - %2 = arith.addf %0, %1 : f32 - affine.store %2, %arg4[%arg5, %arg6] : memref<10x10xf32> - } - } - return - } - ``` - Output: - ```mlir - func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>, - %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>, - %arg4: memref<10x10xf32>) { - affine.for %arg5 = 0 to 3 { - affine.for %arg6 = 0 to 3 { - %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> - %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32> - %2 = arith.mulf %0, %1 : f32 - affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32> - %3 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32> - %4 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32> - %5 = arith.addf %3, %4 : f32 - affine.store %5, %arg4[%arg5, %arg6] : memref<10x10xf32> - } - } - return - } - ``` - }]; - let constructor = "mlir::createLoopFusionPass()"; - let options = [ - Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double", - /*default=*/"0.30f", "Fractional increase in additional computation " - "tolerated while fusing">, - Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned", - /*default=*/"0", - "Faster memory space number to promote fusion buffers to">, - Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t", - /*default=*/"0", "Threshold size (KiB) for promoting local buffers " - "to fast memory space">, - Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false", - "Enables maximal loop fusion">, - Option<"affineFusionMode", "mode", "enum FusionMode", - "mlir::FusionMode::Greedy", "fusion mode to attempt", - "llvm::cl::values(clEnumValN(mlir::FusionMode::Greedy," - " \"greedy\", \"Perform greedy (both producer-consumer and sibling) fusion\"), " - "clEnumValN( mlir::FusionMode::ProducerConsumer, " - "\"producer\", \"Perform only producer-consumer fusion\"), " - "clEnumValN( mlir::FusionMode::Sibling, " - "\"sibling\", \"Perform only sibling fusion\"))">, - ]; - let dependentDialects = ["memref::MemRefDialect"]; -} - -def AffinePipelineDataTransfer - : Pass<"affine-pipeline-data-transfer", "FuncOp"> { - let summary = "Pipeline non-blocking data transfers between explicitly " - "managed levels of the memory hierarchy"; - let description = [{ - This pass performs a transformation to overlap non-blocking DMA operations - in a loop with computations through double buffering. This is achieved by - advancing dma_start operations with respect to other operations. - - Input - - ```mlir - func @pipelinedatatransfer() { - %0 = memref.alloc() : memref<256xf32> - %1 = memref.alloc() : memref<32xf32, 1> - %2 = memref.alloc() : memref<1xf32> - %c0 = arith.constant 0 : index - %c128 = arith.constant 128 : index - affine.for %i0 = 0 to 8 { - affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32> - affine.dma_wait %2[%c0], %c128 : memref<1xf32> - %3 = affine.load %1[%i0] : memref<32xf32, 1> - %4 = "compute"(%3) : (f32) -> f32 - affine.store %4, %1[%i0] : memref<32xf32, 1> - } - return - } - ``` - - Output - - ```mlir - module { - func @pipelinedatatransfer() { - %c8 = arith.constant 8 : index - %c0 = arith.constant 0 : index - %0 = memref.alloc() : memref<256xf32> - %c0_0 = arith.constant 0 : index - %c128 = arith.constant 128 : index - %1 = memref.alloc() : memref<2x32xf32, 1> - %2 = memref.alloc() : memref<2x1xf32> - affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32> - affine.for %arg0 = 1 to 8 { - affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32> - %8 = affine.apply #map3(%arg0) - %9 = affine.apply #map4(%8) - %10 = affine.apply #map4(%8) - affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32> - %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1> - %12 = "compute"(%11) : (f32) -> f32 - affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1> - } - %3 = affine.apply #map3(%c8) - %4 = affine.apply #map4(%3) - %5 = affine.apply #map4(%3) - affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32> - %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1> - %7 = "compute"(%6) : (f32) -> f32 - affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1> - memref.dealloc %2 : memref<2x1xf32> - memref.dealloc %1 : memref<2x32xf32, 1> - return - } - } - ``` - }]; - let constructor = "mlir::createPipelineDataTransferPass()"; -} - def Canonicalizer : Pass<"canonicalize"> { let summary = "Canonicalize operations"; let description = [{ @@ -315,34 +114,11 @@ ]; } -def LoopCoalescing : Pass<"loop-coalescing", "FuncOp"> { - let summary = "Coalesce nested loops with independent bounds into a single " - "loop"; - let constructor = "mlir::createLoopCoalescingPass()"; - let dependentDialects = ["arith::ArithmeticDialect"]; -} - def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> { let summary = "Hoist loop invariant instructions outside of the loop"; let constructor = "mlir::createLoopInvariantCodeMotionPass()"; } -def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> { - let summary = "Collapse parallel loops to use less induction variables"; - let constructor = "mlir::createParallelLoopCollapsingPass()"; - let options = [ - ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned", - "Which loop indices to combine 0th loop index", - "llvm::cl::MiscFlags::CommaSeparated">, - ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned", - "Which loop indices to combine into the position 1 loop index", - "llvm::cl::MiscFlags::CommaSeparated">, - ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned", - "Which loop indices to combine into the position 2 loop index", - "llvm::cl::MiscFlags::CommaSeparated">, - ]; -} - def PrintOpStats : Pass<"print-op-stats"> { let summary = "Print statistics of operations"; let constructor = "mlir::createPrintOpStatsPass()"; diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h deleted file mode 100644 --- a/mlir/include/mlir/Transforms/Utils.h +++ /dev/null @@ -1,152 +0,0 @@ -//===- Utils.h - General transformation utilities ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This header file defines prototypes for various transformation utilities for -// memref's and non-loop IR structures. These are not passes by themselves but -// are used either by passes, optimization sequences, or in turn by other -// transformation utilities. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_TRANSFORMS_UTILS_H -#define MLIR_TRANSFORMS_UTILS_H - -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/AffineMap.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" - -namespace mlir { - -class AffineApplyOp; -class AffineForOp; -class Location; -class OpBuilder; - -namespace memref { -class AllocOp; -} // namespace memref - -/// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while -/// optionally remapping the old memref's indices using the supplied affine map, -/// `indexRemap`. The new memref could be of a different shape or rank. -/// `extraIndices` provides any additional access indices to be added to the -/// start. -/// -/// `indexRemap` remaps indices of the old memref access to a new set of indices -/// that are used to index the memref. Additional input operands to indexRemap -/// can be optionally provided in `extraOperands`, and they occupy the start -/// of its input list. `indexRemap`'s dimensional inputs are expected to -/// correspond to memref's indices, and its symbolic inputs if any should be -/// provided in `symbolOperands`. -/// -/// `domOpFilter`, if non-null, restricts the replacement to only those -/// operations that are dominated by the former; similarly, `postDomOpFilter` -/// restricts replacement to only those operations that are postdominated by it. -/// -/// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing -/// uses of a memref without any requirement for access index rewrites as long -/// as the user operation has the MemRefsNormalizable trait. The default value -/// of this flag is false. -/// -/// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to -/// also be a candidate for replacement. The default value of this flag is -/// false. -/// -/// Returns true on success and false if the replacement is not possible, -/// whenever a memref is used as an operand in a non-dereferencing context and -/// 'allowNonDereferencingOps' is false, except for dealloc's on the memref -/// which are left untouched. See comments at function definition for an -/// example. -// -// Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]: -// The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and -// index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0, -// d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any -// extra operands, note that 'indexRemap' would just be applied to existing -// indices (%i, %j). -// TODO: allow extraIndices to be added at any position. -LogicalResult replaceAllMemRefUsesWith( - Value oldMemRef, Value newMemRef, ArrayRef extraIndices = {}, - AffineMap indexRemap = AffineMap(), ArrayRef extraOperands = {}, - ArrayRef symbolOperands = {}, Operation *domOpFilter = nullptr, - Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false, - bool replaceInDeallocOp = false); - -/// Performs the same replacement as the other version above but only for the -/// dereferencing uses of `oldMemRef` in `op`, except in cases where -/// 'allowNonDereferencingOps' is set to true where we replace the -/// non-dereferencing uses as well. -LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, - Operation *op, - ArrayRef extraIndices = {}, - AffineMap indexRemap = AffineMap(), - ArrayRef extraOperands = {}, - ArrayRef symbolOperands = {}, - bool allowNonDereferencingOps = false); - -/// Rewrites the memref defined by this alloc op to have an identity layout map -/// and updates all its indexing uses. Returns failure if any of its uses -/// escape (while leaving the IR in a valid state). -LogicalResult normalizeMemRef(memref::AllocOp *op); - -/// Uses the old memref type map layout and computes the new memref type to have -/// a new shape and a layout map, where the old layout map has been normalized -/// to an identity layout map. It returns the old memref in case no -/// normalization was needed or a failure occurs while transforming the old map -/// layout to an identity layout map. -MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder, - unsigned numSymbolicOperands); - -/// Creates and inserts into 'builder' a new AffineApplyOp, with the number of -/// its results equal to the number of operands, as a composition -/// of all other AffineApplyOps reachable from input parameter 'operands'. If -/// different operands were drawing results from multiple affine apply ops, -/// these will also be collected into a single (multi-result) affine apply op. -/// The final results of the composed AffineApplyOp are returned in output -/// parameter 'results'. Returns the affine apply op created. -Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc, - ArrayRef operands, - ArrayRef affineApplyOps, - SmallVectorImpl *results); - -/// Given an operation, inserts one or more single result affine apply -/// operations, results of which are exclusively used by this operation. -/// The operands of these newly created affine apply ops are -/// guaranteed to be loop iterators or terminal symbols of a function. -/// -/// Before -/// -/// affine.for %i = 0 to #map(%N) -/// %idx = affine.apply (d0) -> (d0 mod 2) (%i) -/// send %A[%idx], ... -/// %v = "compute"(%idx, ...) -/// -/// After -/// -/// affine.for %i = 0 to #map(%N) -/// %idx = affine.apply (d0) -> (d0 mod 2) (%i) -/// send %A[%idx], ... -/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i) -/// %v = "compute"(%idx_, ...) - -/// This allows the application of different transformations on send and -/// compute (for eg. different shifts/delays) -/// -/// Fills `sliceOps` with the list of affine.apply operations. -/// In the following cases, `sliceOps` remains empty: -/// 1. If none of opInst's operands were the result of an affine.apply -/// (i.e., there was no affine computation slice to create). -/// 2. If all the affine.apply op's supplying operands to this opInst did not -/// have any uses other than those in this opInst. -void createAffineComputationSlice(Operation *opInst, - SmallVectorImpl *sliceOps); - -} // namespace mlir - -#endif // MLIR_TRANSFORMS_UTILS_H diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp --- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp +++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp @@ -16,6 +16,7 @@ #include "mlir/Conversion/AffineToStandard/AffineToStandard.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/GPU/ParallelLoopMapper.h" @@ -27,7 +28,6 @@ #include "mlir/IR/Builders.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/Passes.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/Sequence.h" diff --git a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp --- a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp +++ b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp @@ -13,6 +13,7 @@ #include "mlir/Conversion/SCFToStandard/SCFToStandard.h" #include "../PassDetail.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" @@ -23,7 +24,6 @@ #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" -#include "mlir/Transforms/Utils.h" using namespace mlir; using namespace mlir::scf; diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -19,6 +19,7 @@ #include "mlir/Conversion/LLVMCommon/VectorPattern.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" @@ -33,7 +34,6 @@ #include "mlir/Support/MathExtras.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" -#include "mlir/Transforms/Utils.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp @@ -22,12 +22,12 @@ #include "PassDetail.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/LoopUtils.h" #include "llvm/ADT/MapVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp @@ -17,13 +17,13 @@ #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Passes.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/Builders.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Utils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallPtrSet.h" diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp @@ -18,10 +18,10 @@ #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Affine/Passes.h.inc" #include "mlir/Dialect/Affine/Utils.h" -#include "mlir/Transforms/LoopUtils.h" #include "llvm/Support/Debug.h" #include diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt @@ -4,9 +4,12 @@ AffineLoopNormalize.cpp AffineParallelize.cpp AffineScalarReplacement.cpp + LoopCoalescing.cpp + LoopFusion.cpp LoopTiling.cpp LoopUnroll.cpp LoopUnrollAndJam.cpp + PipelineDataTransfer.cpp SuperVectorize.cpp SimplifyAffineStructures.cpp diff --git a/mlir/lib/Transforms/LoopCoalescing.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp rename from mlir/lib/Transforms/LoopCoalescing.cpp rename to mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp --- a/mlir/lib/Transforms/LoopCoalescing.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp @@ -8,9 +8,9 @@ #include "PassDetail.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/Passes.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp rename from mlir/lib/Transforms/LoopFusion.cpp rename to mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp @@ -16,14 +16,14 @@ #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopFusionUtils.h" +#include "mlir/Dialect/Affine/LoopUtils.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/Builders.h" -#include "mlir/Transforms/LoopFusionUtils.h" -#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/Passes.h" -#include "mlir/Transforms/Utils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp --- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp @@ -17,11 +17,11 @@ #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Passes.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Utils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -12,11 +12,11 @@ #include "PassDetail.h" #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/Builders.h" -#include "mlir/Transforms/LoopUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp @@ -37,12 +37,12 @@ #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" -#include "mlir/Transforms/LoopUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/CommandLine.h" diff --git a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h --- a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h +++ b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h @@ -9,6 +9,7 @@ #ifndef DIALECT_AFFINE_TRANSFORMS_PASSDETAIL_H_ #define DIALECT_AFFINE_TRANSFORMS_PASSDETAIL_H_ +#include "mlir/Dialect/Affine/Passes.h" #include "mlir/Pass/Pass.h" namespace mlir { @@ -16,6 +17,10 @@ template void registerDialect(DialectRegistry ®istry); +namespace arith { +class ArithmeticDialect; +} // namespace arith + namespace linalg { class LinalgDialect; } // namespace linalg diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp rename from mlir/lib/Transforms/PipelineDataTransfer.cpp rename to mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp --- a/mlir/lib/Transforms/PipelineDataTransfer.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp @@ -11,17 +11,16 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" -#include "mlir/Transforms/Passes.h" - #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/StandardOps/Utils/Utils.h" #include "mlir/IR/Builders.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Utils.h" +#include "mlir/Transforms/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp --- a/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp @@ -14,9 +14,9 @@ #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/Passes.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/IR/IntegerSet.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/Utils.h" #define DEBUG_TYPE "simplify-affine-structure" diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt --- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt @@ -1,4 +1,6 @@ add_mlir_dialect_library(MLIRAffineUtils + LoopFusionUtils.cpp + LoopUtils.cpp Utils.cpp ADDITIONAL_HEADER_DIRS @@ -7,5 +9,6 @@ LINK_LIBS PUBLIC MLIRAffine MLIRAnalysis + MLIRMemRef MLIRTransformUtils ) diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp rename from mlir/lib/Transforms/Utils/LoopFusionUtils.cpp rename to mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp --- a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp @@ -10,21 +10,20 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Transforms/LoopFusionUtils.h" - +#include "mlir/Dialect/Affine/LoopFusionUtils.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" #include "mlir/Dialect/Affine/Analysis/AffineStructures.h" #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Operation.h" -#include "mlir/Transforms/LoopUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp rename from mlir/lib/Transforms/Utils/LoopUtils.cpp rename to mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -10,14 +10,14 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Transforms/LoopUtils.h" - +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/BlockAndValueMapping.h" @@ -25,7 +25,6 @@ #include "mlir/Support/MathExtras.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/RegionUtils.h" -#include "mlir/Transforms/Utils.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -16,12 +16,14 @@ #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/IntegerSet.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/LoopUtils.h" + +#define DEBUG_TYPE "affine-utils" using namespace mlir; @@ -856,3 +858,740 @@ defOp->erase(); } } + +// Perform the replacement in `op`. +LogicalResult mlir::replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, + Operation *op, + ArrayRef extraIndices, + AffineMap indexRemap, + ArrayRef extraOperands, + ArrayRef symbolOperands, + bool allowNonDereferencingOps) { + unsigned newMemRefRank = newMemRef.getType().cast().getRank(); + (void)newMemRefRank; // unused in opt mode + unsigned oldMemRefRank = oldMemRef.getType().cast().getRank(); + (void)oldMemRefRank; // unused in opt mode + if (indexRemap) { + assert(indexRemap.getNumSymbols() == symbolOperands.size() && + "symbolic operand count mismatch"); + assert(indexRemap.getNumInputs() == + extraOperands.size() + oldMemRefRank + symbolOperands.size()); + assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank); + } else { + assert(oldMemRefRank + extraIndices.size() == newMemRefRank); + } + + // Assert same elemental type. + assert(oldMemRef.getType().cast().getElementType() == + newMemRef.getType().cast().getElementType()); + + SmallVector usePositions; + for (const auto &opEntry : llvm::enumerate(op->getOperands())) { + if (opEntry.value() == oldMemRef) + usePositions.push_back(opEntry.index()); + } + + // If memref doesn't appear, nothing to do. + if (usePositions.empty()) + return success(); + + if (usePositions.size() > 1) { + // TODO: extend it for this case when needed (rare). + assert(false && "multiple dereferencing uses in a single op not supported"); + return failure(); + } + + unsigned memRefOperandPos = usePositions.front(); + + OpBuilder builder(op); + // The following checks if op is dereferencing memref and performs the access + // index rewrites. + auto affMapAccInterface = dyn_cast(op); + if (!affMapAccInterface) { + if (!allowNonDereferencingOps) { + // Failure: memref used in a non-dereferencing context (potentially + // escapes); no replacement in these cases unless allowNonDereferencingOps + // is set. + return failure(); + } + op->setOperand(memRefOperandPos, newMemRef); + return success(); + } + // Perform index rewrites for the dereferencing op and then replace the op + NamedAttribute oldMapAttrPair = + affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef); + AffineMap oldMap = oldMapAttrPair.getValue().cast().getValue(); + unsigned oldMapNumInputs = oldMap.getNumInputs(); + SmallVector oldMapOperands( + op->operand_begin() + memRefOperandPos + 1, + op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs); + + // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'. + SmallVector oldMemRefOperands; + SmallVector affineApplyOps; + oldMemRefOperands.reserve(oldMemRefRank); + if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) { + for (auto resultExpr : oldMap.getResults()) { + auto singleResMap = AffineMap::get(oldMap.getNumDims(), + oldMap.getNumSymbols(), resultExpr); + auto afOp = builder.create(op->getLoc(), singleResMap, + oldMapOperands); + oldMemRefOperands.push_back(afOp); + affineApplyOps.push_back(afOp); + } + } else { + oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end()); + } + + // Construct new indices as a remap of the old ones if a remapping has been + // provided. The indices of a memref come right after it, i.e., + // at position memRefOperandPos + 1. + SmallVector remapOperands; + remapOperands.reserve(extraOperands.size() + oldMemRefRank + + symbolOperands.size()); + remapOperands.append(extraOperands.begin(), extraOperands.end()); + remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end()); + remapOperands.append(symbolOperands.begin(), symbolOperands.end()); + + SmallVector remapOutputs; + remapOutputs.reserve(oldMemRefRank); + + if (indexRemap && + indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) { + // Remapped indices. + for (auto resultExpr : indexRemap.getResults()) { + auto singleResMap = AffineMap::get( + indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr); + auto afOp = builder.create(op->getLoc(), singleResMap, + remapOperands); + remapOutputs.push_back(afOp); + affineApplyOps.push_back(afOp); + } + } else { + // No remapping specified. + remapOutputs.assign(remapOperands.begin(), remapOperands.end()); + } + + SmallVector newMapOperands; + newMapOperands.reserve(newMemRefRank); + + // Prepend 'extraIndices' in 'newMapOperands'. + for (Value extraIndex : extraIndices) { + assert(extraIndex.getDefiningOp()->getNumResults() == 1 && + "single result op's expected to generate these indices"); + assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) && + "invalid memory op index"); + newMapOperands.push_back(extraIndex); + } + + // Append 'remapOutputs' to 'newMapOperands'. + newMapOperands.append(remapOutputs.begin(), remapOutputs.end()); + + // Create new fully composed AffineMap for new op to be created. + assert(newMapOperands.size() == newMemRefRank); + auto newMap = builder.getMultiDimIdentityMap(newMemRefRank); + // TODO: Avoid creating/deleting temporary AffineApplyOps here. + fullyComposeAffineMapAndOperands(&newMap, &newMapOperands); + newMap = simplifyAffineMap(newMap); + canonicalizeMapAndOperands(&newMap, &newMapOperands); + // Remove any affine.apply's that became dead as a result of composition. + for (Value value : affineApplyOps) + if (value.use_empty()) + value.getDefiningOp()->erase(); + + OperationState state(op->getLoc(), op->getName()); + // Construct the new operation using this memref. + state.operands.reserve(op->getNumOperands() + extraIndices.size()); + // Insert the non-memref operands. + state.operands.append(op->operand_begin(), + op->operand_begin() + memRefOperandPos); + // Insert the new memref value. + state.operands.push_back(newMemRef); + + // Insert the new memref map operands. + state.operands.append(newMapOperands.begin(), newMapOperands.end()); + + // Insert the remaining operands unmodified. + state.operands.append(op->operand_begin() + memRefOperandPos + 1 + + oldMapNumInputs, + op->operand_end()); + + // Result types don't change. Both memref's are of the same elemental type. + state.types.reserve(op->getNumResults()); + for (auto result : op->getResults()) + state.types.push_back(result.getType()); + + // Add attribute for 'newMap', other Attributes do not change. + auto newMapAttr = AffineMapAttr::get(newMap); + for (auto namedAttr : op->getAttrs()) { + if (namedAttr.getName() == oldMapAttrPair.getName()) + state.attributes.push_back({namedAttr.getName(), newMapAttr}); + else + state.attributes.push_back(namedAttr); + } + + // Create the new operation. + auto *repOp = builder.createOperation(state); + op->replaceAllUsesWith(repOp); + op->erase(); + + return success(); +} + +LogicalResult mlir::replaceAllMemRefUsesWith( + Value oldMemRef, Value newMemRef, ArrayRef extraIndices, + AffineMap indexRemap, ArrayRef extraOperands, + ArrayRef symbolOperands, Operation *domOpFilter, + Operation *postDomOpFilter, bool allowNonDereferencingOps, + bool replaceInDeallocOp) { + unsigned newMemRefRank = newMemRef.getType().cast().getRank(); + (void)newMemRefRank; // unused in opt mode + unsigned oldMemRefRank = oldMemRef.getType().cast().getRank(); + (void)oldMemRefRank; + if (indexRemap) { + assert(indexRemap.getNumSymbols() == symbolOperands.size() && + "symbol operand count mismatch"); + assert(indexRemap.getNumInputs() == + extraOperands.size() + oldMemRefRank + symbolOperands.size()); + assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank); + } else { + assert(oldMemRefRank + extraIndices.size() == newMemRefRank); + } + + // Assert same elemental type. + assert(oldMemRef.getType().cast().getElementType() == + newMemRef.getType().cast().getElementType()); + + std::unique_ptr domInfo; + std::unique_ptr postDomInfo; + if (domOpFilter) + domInfo = + std::make_unique(domOpFilter->getParentOfType()); + + if (postDomOpFilter) + postDomInfo = std::make_unique( + postDomOpFilter->getParentOfType()); + + // Walk all uses of old memref; collect ops to perform replacement. We use a + // DenseSet since an operation could potentially have multiple uses of a + // memref (although rare), and the replacement later is going to erase ops. + DenseSet opsToReplace; + for (auto *op : oldMemRef.getUsers()) { + // Skip this use if it's not dominated by domOpFilter. + if (domOpFilter && !domInfo->dominates(domOpFilter, op)) + continue; + + // Skip this use if it's not post-dominated by postDomOpFilter. + if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op)) + continue; + + // Skip dealloc's - no replacement is necessary, and a memref replacement + // at other uses doesn't hurt these dealloc's. + if (isa(op) && !replaceInDeallocOp) + continue; + + // Check if the memref was used in a non-dereferencing context. It is fine + // for the memref to be used in a non-dereferencing way outside of the + // region where this replacement is happening. + if (!isa(*op)) { + if (!allowNonDereferencingOps) { + LLVM_DEBUG(llvm::dbgs() + << "Memref replacement failed: non-deferencing memref op: \n" + << *op << '\n'); + return failure(); + } + // Non-dereferencing ops with the MemRefsNormalizable trait are + // supported for replacement. + if (!op->hasTrait()) { + LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a " + "memrefs normalizable trait: \n" + << *op << '\n'); + return failure(); + } + } + + // We'll first collect and then replace --- since replacement erases the op + // that has the use, and that op could be postDomFilter or domFilter itself! + opsToReplace.insert(op); + } + + for (auto *op : opsToReplace) { + if (failed(replaceAllMemRefUsesWith( + oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands, + symbolOperands, allowNonDereferencingOps))) + llvm_unreachable("memref replacement guaranteed to succeed here"); + } + + return success(); +} + +/// Given an operation, inserts one or more single result affine +/// apply operations, results of which are exclusively used by this operation +/// operation. The operands of these newly created affine apply ops are +/// guaranteed to be loop iterators or terminal symbols of a function. +/// +/// Before +/// +/// affine.for %i = 0 to #map(%N) +/// %idx = affine.apply (d0) -> (d0 mod 2) (%i) +/// "send"(%idx, %A, ...) +/// "compute"(%idx) +/// +/// After +/// +/// affine.for %i = 0 to #map(%N) +/// %idx = affine.apply (d0) -> (d0 mod 2) (%i) +/// "send"(%idx, %A, ...) +/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i) +/// "compute"(%idx_) +/// +/// This allows applying different transformations on send and compute (for eg. +/// different shifts/delays). +/// +/// Returns nullptr either if none of opInst's operands were the result of an +/// affine.apply and thus there was no affine computation slice to create, or if +/// all the affine.apply op's supplying operands to this opInst did not have any +/// uses besides this opInst; otherwise returns the list of affine.apply +/// operations created in output argument `sliceOps`. +void mlir::createAffineComputationSlice( + Operation *opInst, SmallVectorImpl *sliceOps) { + // Collect all operands that are results of affine apply ops. + SmallVector subOperands; + subOperands.reserve(opInst->getNumOperands()); + for (auto operand : opInst->getOperands()) + if (isa_and_nonnull(operand.getDefiningOp())) + subOperands.push_back(operand); + + // Gather sequence of AffineApplyOps reachable from 'subOperands'. + SmallVector affineApplyOps; + getReachableAffineApplyOps(subOperands, affineApplyOps); + // Skip transforming if there are no affine maps to compose. + if (affineApplyOps.empty()) + return; + + // Check if all uses of the affine apply op's lie only in this op op, in + // which case there would be nothing to do. + bool localized = true; + for (auto *op : affineApplyOps) { + for (auto result : op->getResults()) { + for (auto *user : result.getUsers()) { + if (user != opInst) { + localized = false; + break; + } + } + } + } + if (localized) + return; + + OpBuilder builder(opInst); + SmallVector composedOpOperands(subOperands); + auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size()); + fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands); + + // Create an affine.apply for each of the map results. + sliceOps->reserve(composedMap.getNumResults()); + for (auto resultExpr : composedMap.getResults()) { + auto singleResMap = AffineMap::get(composedMap.getNumDims(), + composedMap.getNumSymbols(), resultExpr); + sliceOps->push_back(builder.create( + opInst->getLoc(), singleResMap, composedOpOperands)); + } + + // Construct the new operands that include the results from the composed + // affine apply op above instead of existing ones (subOperands). So, they + // differ from opInst's operands only for those operands in 'subOperands', for + // which they will be replaced by the corresponding one from 'sliceOps'. + SmallVector newOperands(opInst->getOperands()); + for (unsigned i = 0, e = newOperands.size(); i < e; i++) { + // Replace the subOperands from among the new operands. + unsigned j, f; + for (j = 0, f = subOperands.size(); j < f; j++) { + if (newOperands[i] == subOperands[j]) + break; + } + if (j < subOperands.size()) { + newOperands[i] = (*sliceOps)[j]; + } + } + for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++) { + opInst->setOperand(idx, newOperands[idx]); + } +} + +/// Enum to set patterns of affine expr in tiled-layout map. +/// TileFloorDiv: div +/// TileMod: mod +/// TileNone: None of the above +/// Example: +/// #tiled_2d_128x256 = affine_map<(d0, d1) +/// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)> +/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv +/// "d0 mod 128" and "d1 mod 256" ==> TileMod +enum TileExprPattern { TileFloorDiv, TileMod, TileNone }; + +/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions +/// being floordiv'ed by respective tile sizes appeare in a mod with the same +/// tile sizes, and no other expression involves those k dimensions. This +/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for +/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a +/// tiled layout, an empty vector is returned. +static LogicalResult getTileSizePos( + AffineMap map, + SmallVectorImpl> &tileSizePos) { + // Create `floordivExprs` which is a vector of tuples including LHS and RHS of + // `floordiv` and its position in `map` output. + // Example: #tiled_2d_128x256 = affine_map<(d0, d1) + // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)> + // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}. + SmallVector, 4> floordivExprs; + unsigned pos = 0; + for (AffineExpr expr : map.getResults()) { + if (expr.getKind() == AffineExprKind::FloorDiv) { + AffineBinaryOpExpr binaryExpr = expr.cast(); + if (binaryExpr.getRHS().isa()) + floordivExprs.emplace_back( + std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos)); + } + pos++; + } + // Not tiled layout if `floordivExprs` is empty. + if (floordivExprs.empty()) { + tileSizePos = SmallVector>{}; + return success(); + } + + // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is + // not tiled layout. + for (std::tuple fexpr : floordivExprs) { + AffineExpr floordivExprLHS = std::get<0>(fexpr); + AffineExpr floordivExprRHS = std::get<1>(fexpr); + unsigned floordivPos = std::get<2>(fexpr); + + // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS + // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used + // other expr, the map is not tiled layout. Example of non tiled layout: + // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)> + // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)> + // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod + // 256)> + bool found = false; + pos = 0; + for (AffineExpr expr : map.getResults()) { + bool notTiled = false; + if (pos != floordivPos) { + expr.walk([&](AffineExpr e) { + if (e == floordivExprLHS) { + if (expr.getKind() == AffineExprKind::Mod) { + AffineBinaryOpExpr binaryExpr = expr.cast(); + // If LHS and RHS of `mod` are the same with those of floordiv. + if (floordivExprLHS == binaryExpr.getLHS() && + floordivExprRHS == binaryExpr.getRHS()) { + // Save tile size (RHS of `mod`), and position of `floordiv` and + // `mod` if same expr with `mod` is not found yet. + if (!found) { + tileSizePos.emplace_back( + std::make_tuple(binaryExpr.getRHS(), floordivPos, pos)); + found = true; + } else { + // Non tiled layout: Have multilpe `mod` with the same LHS. + // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 + // mod 256, d2 mod 256)> + notTiled = true; + } + } else { + // Non tiled layout: RHS of `mod` is different from `floordiv`. + // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 + // mod 128)> + notTiled = true; + } + } else { + // Non tiled layout: LHS is the same, but not `mod`. + // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 + // floordiv 256)> + notTiled = true; + } + } + }); + } + if (notTiled) { + tileSizePos = SmallVector>{}; + return success(); + } + pos++; + } + } + return success(); +} + +/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic +/// after normalization. Dimensions that include dynamic dimensions in the map +/// output will become dynamic dimensions. Return true if `dim` is dynamic +/// dimension. +/// +/// Example: +/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)> +/// +/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic. +/// memref<4x?xf32, #map0> ==> memref<4x?x?xf32> +static bool +isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, + SmallVectorImpl &inMemrefTypeDynDims, + MLIRContext *context) { + bool isDynamicDim = false; + AffineExpr expr = layoutMap.getResults()[dim]; + // Check if affine expr of the dimension includes dynamic dimension of input + // memrefType. + expr.walk([&inMemrefTypeDynDims, &isDynamicDim, &context](AffineExpr e) { + if (e.isa()) { + for (unsigned dm : inMemrefTypeDynDims) { + if (e == getAffineDimExpr(dm, context)) { + isDynamicDim = true; + } + } + } + }); + return isDynamicDim; +} + +/// Create affine expr to calculate dimension size for a tiled-layout map. +static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, + TileExprPattern pat) { + // Create map output for the patterns. + // "floordiv " ==> "ceildiv " + // "mod " ==> "" + AffineExpr newMapOutput; + AffineBinaryOpExpr binaryExpr = nullptr; + switch (pat) { + case TileExprPattern::TileMod: + binaryExpr = oldMapOutput.cast(); + newMapOutput = binaryExpr.getRHS(); + break; + case TileExprPattern::TileFloorDiv: + binaryExpr = oldMapOutput.cast(); + newMapOutput = getAffineBinaryOpExpr( + AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS()); + break; + default: + newMapOutput = oldMapOutput; + } + return newMapOutput; +} + +/// Create new maps to calculate each dimension size of `newMemRefType`, and +/// create `newDynamicSizes` from them by using AffineApplyOp. +/// +/// Steps for normalizing dynamic memrefs for a tiled layout map +/// Example: +/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)> +/// %0 = dim %arg0, %c1 :memref<4x?xf32> +/// %1 = alloc(%0) : memref<4x?xf32, #map0> +/// +/// (Before this function) +/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only +/// single layout map is supported. +/// +/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It +/// is memref<4x?x?xf32> in the above example. +/// +/// (In this function) +/// 3. Create new maps to calculate each dimension of the normalized memrefType +/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the +/// dimension size can be calculated by replacing "floordiv " with +/// "ceildiv " and "mod " with "". +/// - New map in the above example +/// #map0 = affine_map<(d0, d1) -> (d0)> +/// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)> +/// #map2 = affine_map<(d0, d1) -> (32)> +/// +/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp +/// is used in dynamicSizes of new AllocOp. +/// %0 = dim %arg0, %c1 : memref<4x?xf32> +/// %c4 = arith.constant 4 : index +/// %1 = affine.apply #map1(%c4, %0) +/// %2 = affine.apply #map2(%c4, %0) +static void createNewDynamicSizes(MemRefType oldMemRefType, + MemRefType newMemRefType, AffineMap map, + memref::AllocOp *allocOp, OpBuilder b, + SmallVectorImpl &newDynamicSizes) { + // Create new input for AffineApplyOp. + SmallVector inAffineApply; + ArrayRef oldMemRefShape = oldMemRefType.getShape(); + unsigned dynIdx = 0; + for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) { + if (oldMemRefShape[d] < 0) { + // Use dynamicSizes of allocOp for dynamic dimension. + inAffineApply.emplace_back(allocOp->dynamicSizes()[dynIdx]); + dynIdx++; + } else { + // Create ConstantOp for static dimension. + Attribute constantAttr = + b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]); + inAffineApply.emplace_back( + b.create(allocOp->getLoc(), constantAttr)); + } + } + + // Create new map to calculate each dimension size of new memref for each + // original map output. Only for dynamic dimesion of `newMemRefType`. + unsigned newDimIdx = 0; + ArrayRef newMemRefShape = newMemRefType.getShape(); + SmallVector> tileSizePos; + (void)getTileSizePos(map, tileSizePos); + for (AffineExpr expr : map.getResults()) { + if (newMemRefShape[newDimIdx] < 0) { + // Create new maps to calculate each dimension size of new memref. + enum TileExprPattern pat = TileExprPattern::TileNone; + for (auto pos : tileSizePos) { + if (newDimIdx == std::get<1>(pos)) + pat = TileExprPattern::TileFloorDiv; + else if (newDimIdx == std::get<2>(pos)) + pat = TileExprPattern::TileMod; + } + AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat); + AffineMap newMap = + AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput); + Value affineApp = + b.create(allocOp->getLoc(), newMap, inAffineApply); + newDynamicSizes.emplace_back(affineApp); + } + newDimIdx++; + } +} + +// TODO: Currently works for static memrefs with a single layout map. +LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) { + MemRefType memrefType = allocOp->getType(); + OpBuilder b(*allocOp); + + // Fetch a new memref type after normalizing the old memref to have an + // identity map layout. + MemRefType newMemRefType = + normalizeMemRefType(memrefType, b, allocOp->symbolOperands().size()); + if (newMemRefType == memrefType) + // Either memrefType already had an identity map or the map couldn't be + // transformed to an identity map. + return failure(); + + Value oldMemRef = allocOp->getResult(); + + SmallVector symbolOperands(allocOp->symbolOperands()); + AffineMap layoutMap = memrefType.getLayout().getAffineMap(); + memref::AllocOp newAlloc; + // Check if `layoutMap` is a tiled layout. Only single layout map is + // supported for normalizing dynamic memrefs. + SmallVector> tileSizePos; + (void)getTileSizePos(layoutMap, tileSizePos); + if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) { + MemRefType oldMemRefType = oldMemRef.getType().cast(); + SmallVector newDynamicSizes; + createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b, + newDynamicSizes); + // Add the new dynamic sizes in new AllocOp. + newAlloc = + b.create(allocOp->getLoc(), newMemRefType, + newDynamicSizes, allocOp->alignmentAttr()); + } else { + newAlloc = b.create(allocOp->getLoc(), newMemRefType, + allocOp->alignmentAttr()); + } + // Replace all uses of the old memref. + if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc, + /*extraIndices=*/{}, + /*indexRemap=*/layoutMap, + /*extraOperands=*/{}, + /*symbolOperands=*/symbolOperands, + /*domOpFilter=*/nullptr, + /*postDomOpFilter=*/nullptr, + /*allowNonDereferencingOps=*/true))) { + // If it failed (due to escapes for example), bail out. + newAlloc.erase(); + return failure(); + } + // Replace any uses of the original alloc op and erase it. All remaining uses + // have to be dealloc's; RAMUW above would've failed otherwise. + assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) { + return isa(op); + })); + oldMemRef.replaceAllUsesWith(newAlloc); + allocOp->erase(); + return success(); +} + +MemRefType mlir::normalizeMemRefType(MemRefType memrefType, OpBuilder b, + unsigned numSymbolicOperands) { + unsigned rank = memrefType.getRank(); + if (rank == 0) + return memrefType; + + if (memrefType.getLayout().isIdentity()) { + // Either no maps is associated with this memref or this memref has + // a trivial (identity) map. + return memrefType; + } + AffineMap layoutMap = memrefType.getLayout().getAffineMap(); + + // We don't do any checks for one-to-one'ness; we assume that it is + // one-to-one. + + // Normalize only static memrefs and dynamic memrefs with a tiled-layout map + // for now. + // TODO: Normalize the other types of dynamic memrefs. + SmallVector> tileSizePos; + (void)getTileSizePos(layoutMap, tileSizePos); + if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty()) + return memrefType; + + // We have a single map that is not an identity map. Create a new memref + // with the right shape and an identity layout map. + ArrayRef shape = memrefType.getShape(); + // FlatAffineConstraint may later on use symbolicOperands. + FlatAffineConstraints fac(rank, numSymbolicOperands); + SmallVector memrefTypeDynDims; + for (unsigned d = 0; d < rank; ++d) { + // Use constraint system only in static dimensions. + if (shape[d] > 0) { + fac.addBound(FlatAffineConstraints::LB, d, 0); + fac.addBound(FlatAffineConstraints::UB, d, shape[d] - 1); + } else { + memrefTypeDynDims.emplace_back(d); + } + } + // We compose this map with the original index (logical) space to derive + // the upper bounds for the new index space. + unsigned newRank = layoutMap.getNumResults(); + if (failed(fac.composeMatchingMap(layoutMap))) + return memrefType; + // TODO: Handle semi-affine maps. + // Project out the old data dimensions. + fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds()); + SmallVector newShape(newRank); + for (unsigned d = 0; d < newRank; ++d) { + // Check if each dimension of normalized memrefType is dynamic. + bool isDynDim = isNormalizedMemRefDynamicDim( + d, layoutMap, memrefTypeDynDims, b.getContext()); + if (isDynDim) { + newShape[d] = -1; + } else { + // The lower bound for the shape is always zero. + auto ubConst = fac.getConstantBound(FlatAffineConstraints::UB, d); + // For a static memref and an affine map with no symbols, this is + // always bounded. + assert(ubConst.hasValue() && "should always have an upper bound"); + if (ubConst.getValue() < 0) + // This is due to an invalid map that maps to a negative space. + return memrefType; + // If dimension of new memrefType is dynamic, the value is -1. + newShape[d] = ubConst.getValue() + 1; + } + } + + // Create the new memref type after trivializing the old layout map. + MemRefType newMemRefType = + MemRefType::Builder(memrefType) + .setShape(newShape) + .setLayout(AffineMapAttr::get(b.getMultiDimIdentityMap(newRank))); + + return newMemRefType; +} diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp --- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/GPU/MemoryPromotion.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" @@ -19,7 +20,6 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/ImplicitLocOpBuilder.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/LoopUtils.h" using namespace mlir; using namespace mlir::gpu; diff --git a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Linalg/Transforms/CodegenStrategy.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Hoisting.h" #include "mlir/Dialect/SCF/Transforms.h" @@ -20,7 +21,6 @@ #include "mlir/Dialect/Vector/VectorTransforms.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/Passes.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -12,6 +12,7 @@ #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h" #include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" @@ -24,7 +25,6 @@ #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dominance.h" -#include "mlir/Transforms/LoopUtils.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -15,6 +15,7 @@ #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/Analysis/AffineStructures.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" @@ -27,7 +28,6 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dominance.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/LoopUtils.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp @@ -16,6 +16,8 @@ #include "PassDetail.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Hoisting.h" @@ -29,9 +31,7 @@ #include "mlir/Pass/PassManager.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/Passes.h" -#include "mlir/Transforms/Utils.h" using namespace mlir; using namespace mlir::vector; diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/Affine/Analysis/AffineStructures.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" @@ -30,7 +31,6 @@ #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/LoopUtils.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp --- a/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp @@ -13,9 +13,9 @@ #include "PassDetail.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/Transforms/Passes.h" -#include "mlir/Transforms/Utils.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/Debug.h" diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt @@ -6,6 +6,7 @@ LoopPipelining.cpp LoopRangeFolding.cpp LoopSpecialization.cpp + ParallelLoopCollapsing.cpp ParallelLoopFusion.cpp ParallelLoopTiling.cpp StructuralTypeConversions.cpp diff --git a/mlir/lib/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp rename from mlir/lib/Transforms/ParallelLoopCollapsing.cpp rename to mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp --- a/mlir/lib/Transforms/ParallelLoopCollapsing.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" +#include "mlir/Dialect/Affine/LoopUtils.h" +#include "mlir/Dialect/SCF/Passes.h" #include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Passes.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -20,7 +20,7 @@ namespace { struct ParallelLoopCollapsing - : public ParallelLoopCollapsingBase { + : public SCFParallelLoopCollapsingBase { void runOnOperation() override { Operation *module = getOperation(); diff --git a/mlir/lib/Interfaces/LoopLikeInterface.cpp b/mlir/lib/Interfaces/LoopLikeInterface.cpp --- a/mlir/lib/Interfaces/LoopLikeInterface.cpp +++ b/mlir/lib/Interfaces/LoopLikeInterface.cpp @@ -7,12 +7,95 @@ //===----------------------------------------------------------------------===// #include "mlir/Interfaces/LoopLikeInterface.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/Debug.h" using namespace mlir; +#define DEBUG_TYPE "loop-like" + //===----------------------------------------------------------------------===// // LoopLike Interfaces //===----------------------------------------------------------------------===// /// Include the definitions of the loop-like interfaces. #include "mlir/Interfaces/LoopLikeInterface.cpp.inc" + +//===----------------------------------------------------------------------===// +// LoopLike Utilities +//===----------------------------------------------------------------------===// + +// Checks whether the given op can be hoisted by checking that +// - the op and any of its contained operations do not depend on SSA values +// defined inside of the loop (by means of calling definedOutside). +// - the op has no side-effects. If sideEffecting is Never, sideeffects of this +// op and its nested ops are ignored. +static bool canBeHoisted(Operation *op, + function_ref definedOutside) { + // Check that dependencies are defined outside of loop. + if (!llvm::all_of(op->getOperands(), definedOutside)) + return false; + // Check whether this op is side-effect free. If we already know that there + // can be no side-effects because the surrounding op has claimed so, we can + // (and have to) skip this step. + if (auto memInterface = dyn_cast(op)) { + if (!memInterface.hasNoEffect()) + return false; + // If the operation doesn't have side effects and it doesn't recursively + // have side effects, it can always be hoisted. + if (!op->hasTrait()) + return true; + + // Otherwise, if the operation doesn't provide the memory effect interface + // and it doesn't have recursive side effects we treat it conservatively as + // side-effecting. + } else if (!op->hasTrait()) { + return false; + } + + // Recurse into the regions for this op and check whether the contained ops + // can be hoisted. + for (auto ®ion : op->getRegions()) { + for (auto &block : region) { + for (auto &innerOp : block) + if (!canBeHoisted(&innerOp, definedOutside)) + return false; + } + } + return true; +} + +LogicalResult mlir::moveLoopInvariantCode(LoopLikeOpInterface looplike) { + auto &loopBody = looplike.getLoopBody(); + + // We use two collections here as we need to preserve the order for insertion + // and this is easiest. + SmallPtrSet willBeMovedSet; + SmallVector opsToMove; + + // Helper to check whether an operation is loop invariant wrt. SSA properties. + auto isDefinedOutsideOfBody = [&](Value value) { + auto *definingOp = value.getDefiningOp(); + return (definingOp && !!willBeMovedSet.count(definingOp)) || + looplike.isDefinedOutsideOfLoop(value); + }; + + // Do not use walk here, as we do not want to go into nested regions and hoist + // operations from there. These regions might have semantics unknown to this + // rewriting. If the nested regions are loops, they will have been processed. + for (auto &block : loopBody) { + for (auto &op : block.without_terminator()) { + if (canBeHoisted(&op, isDefinedOutsideOfBody)) { + opsToMove.push_back(&op); + willBeMovedSet.insert(&op); + } + } + } + + // For all instructions that we found to be invariant, move outside of the + // loop. + LogicalResult result = looplike.moveOutOfLoop(opsToMove); + LLVM_DEBUG(looplike.print(llvm::dbgs() << "\n\nModified loop:\n")); + return result; +} diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -5,12 +5,8 @@ CSE.cpp Inliner.cpp LocationSnapshot.cpp - LoopCoalescing.cpp - LoopFusion.cpp LoopInvariantCodeMotion.cpp OpStats.cpp - ParallelLoopCollapsing.cpp - PipelineDataTransfer.cpp SCCP.cpp StripDebugInfo.cpp SymbolDCE.cpp @@ -20,18 +16,13 @@ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms DEPENDS - MLIRStandardOpsIncGen MLIRTransformsPassIncGen LINK_LIBS PUBLIC - MLIRAffine MLIRAnalysis MLIRCopyOpInterface MLIRLoopLikeInterface - MLIRMemRef - MLIRSCF MLIRPass MLIRSupport MLIRTransformUtils - MLIRVector ) diff --git a/mlir/lib/Transforms/CSE.cpp b/mlir/lib/Transforms/CSE.cpp --- a/mlir/lib/Transforms/CSE.cpp +++ b/mlir/lib/Transforms/CSE.cpp @@ -15,7 +15,6 @@ #include "mlir/IR/Dominance.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/Passes.h" -#include "mlir/Transforms/Utils.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp --- a/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp +++ b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp @@ -11,13 +11,10 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" -#include "mlir/Transforms/Passes.h" - #include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinOps.h" #include "mlir/Interfaces/LoopLikeInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -#include "mlir/Transforms/LoopUtils.h" +#include "mlir/Transforms/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -34,80 +31,6 @@ }; } // namespace -// Checks whether the given op can be hoisted by checking that -// - the op and any of its contained operations do not depend on SSA values -// defined inside of the loop (by means of calling definedOutside). -// - the op has no side-effects. If sideEffecting is Never, sideeffects of this -// op and its nested ops are ignored. -static bool canBeHoisted(Operation *op, - function_ref definedOutside) { - // Check that dependencies are defined outside of loop. - if (!llvm::all_of(op->getOperands(), definedOutside)) - return false; - // Check whether this op is side-effect free. If we already know that there - // can be no side-effects because the surrounding op has claimed so, we can - // (and have to) skip this step. - if (auto memInterface = dyn_cast(op)) { - if (!memInterface.hasNoEffect()) - return false; - // If the operation doesn't have side effects and it doesn't recursively - // have side effects, it can always be hoisted. - if (!op->hasTrait()) - return true; - - // Otherwise, if the operation doesn't provide the memory effect interface - // and it doesn't have recursive side effects we treat it conservatively as - // side-effecting. - } else if (!op->hasTrait()) { - return false; - } - - // Recurse into the regions for this op and check whether the contained ops - // can be hoisted. - for (auto ®ion : op->getRegions()) { - for (auto &block : region) { - for (auto &innerOp : block) - if (!canBeHoisted(&innerOp, definedOutside)) - return false; - } - } - return true; -} - -LogicalResult mlir::moveLoopInvariantCode(LoopLikeOpInterface looplike) { - auto &loopBody = looplike.getLoopBody(); - - // We use two collections here as we need to preserve the order for insertion - // and this is easiest. - SmallPtrSet willBeMovedSet; - SmallVector opsToMove; - - // Helper to check whether an operation is loop invariant wrt. SSA properties. - auto isDefinedOutsideOfBody = [&](Value value) { - auto *definingOp = value.getDefiningOp(); - return (definingOp && !!willBeMovedSet.count(definingOp)) || - looplike.isDefinedOutsideOfLoop(value); - }; - - // Do not use walk here, as we do not want to go into nested regions and hoist - // operations from there. These regions might have semantics unknown to this - // rewriting. If the nested regions are loops, they will have been processed. - for (auto &block : loopBody) { - for (auto &op : block.without_terminator()) { - if (canBeHoisted(&op, isDefinedOutsideOfBody)) { - opsToMove.push_back(&op); - willBeMovedSet.insert(&op); - } - } - } - - // For all instructions that we found to be invariant, move outside of the - // loop. - auto result = looplike.moveOutOfLoop(opsToMove); - LLVM_DEBUG(looplike.print(llvm::dbgs() << "\n\nModified loop:\n")); - return result; -} - void LoopInvariantCodeMotion::runOnOperation() { // Walk through all loops in a function in innermost-loop-first order. This // way, we first LICM from the inner loop, and place the ops in diff --git a/mlir/lib/Transforms/PassDetail.h b/mlir/lib/Transforms/PassDetail.h --- a/mlir/lib/Transforms/PassDetail.h +++ b/mlir/lib/Transforms/PassDetail.h @@ -13,23 +13,8 @@ #include "mlir/Transforms/Passes.h" namespace mlir { -class AffineDialect; - -// Forward declaration from Dialect.h -template -void registerDialect(DialectRegistry ®istry); - -namespace arith { -class ArithmeticDialect; -} // namespace arith - -namespace memref { -class MemRefDialect; -} // namespace memref - #define GEN_PASS_CLASSES #include "mlir/Transforms/Passes.h.inc" - } // namespace mlir #endif // TRANSFORMS_PASSDETAIL_H_ diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt --- a/mlir/lib/Transforms/Utils/CMakeLists.txt +++ b/mlir/lib/Transforms/Utils/CMakeLists.txt @@ -3,25 +3,12 @@ FoldUtils.cpp GreedyPatternRewriteDriver.cpp InliningUtils.cpp - LoopFusionUtils.cpp - LoopUtils.cpp RegionUtils.cpp - Utils.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms - DEPENDS - MLIRStandardOpsIncGen - LINK_LIBS PUBLIC - MLIRAffine - MLIRArithmetic MLIRAnalysis - MLIRAffineAnalysis - MLIRMemRef - MLIRSCF - MLIRPass MLIRRewrite - MLIRStandard ) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -13,7 +13,6 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/FunctionInterfaces.h" #include "mlir/Rewrite/PatternApplicator.h" -#include "mlir/Transforms/Utils.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp deleted file mode 100644 --- a/mlir/lib/Transforms/Utils/Utils.cpp +++ /dev/null @@ -1,767 +0,0 @@ -//===- Utils.cpp ---- Misc utilities for code and data transformation -----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements miscellaneous transformation routines for non-loop IR -// structures. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Transforms/Utils.h" -#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" -#include "mlir/Dialect/Affine/Analysis/AffineStructures.h" -#include "mlir/Dialect/Affine/Analysis/Utils.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/Dominance.h" -#include "mlir/Support/MathExtras.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/TypeSwitch.h" - -#define DEBUG_TYPE "transforms-utils" - -using namespace mlir; - -// Perform the replacement in `op`. -LogicalResult mlir::replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, - Operation *op, - ArrayRef extraIndices, - AffineMap indexRemap, - ArrayRef extraOperands, - ArrayRef symbolOperands, - bool allowNonDereferencingOps) { - unsigned newMemRefRank = newMemRef.getType().cast().getRank(); - (void)newMemRefRank; // unused in opt mode - unsigned oldMemRefRank = oldMemRef.getType().cast().getRank(); - (void)oldMemRefRank; // unused in opt mode - if (indexRemap) { - assert(indexRemap.getNumSymbols() == symbolOperands.size() && - "symbolic operand count mismatch"); - assert(indexRemap.getNumInputs() == - extraOperands.size() + oldMemRefRank + symbolOperands.size()); - assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank); - } else { - assert(oldMemRefRank + extraIndices.size() == newMemRefRank); - } - - // Assert same elemental type. - assert(oldMemRef.getType().cast().getElementType() == - newMemRef.getType().cast().getElementType()); - - SmallVector usePositions; - for (const auto &opEntry : llvm::enumerate(op->getOperands())) { - if (opEntry.value() == oldMemRef) - usePositions.push_back(opEntry.index()); - } - - // If memref doesn't appear, nothing to do. - if (usePositions.empty()) - return success(); - - if (usePositions.size() > 1) { - // TODO: extend it for this case when needed (rare). - assert(false && "multiple dereferencing uses in a single op not supported"); - return failure(); - } - - unsigned memRefOperandPos = usePositions.front(); - - OpBuilder builder(op); - // The following checks if op is dereferencing memref and performs the access - // index rewrites. - auto affMapAccInterface = dyn_cast(op); - if (!affMapAccInterface) { - if (!allowNonDereferencingOps) { - // Failure: memref used in a non-dereferencing context (potentially - // escapes); no replacement in these cases unless allowNonDereferencingOps - // is set. - return failure(); - } - op->setOperand(memRefOperandPos, newMemRef); - return success(); - } - // Perform index rewrites for the dereferencing op and then replace the op - NamedAttribute oldMapAttrPair = - affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef); - AffineMap oldMap = oldMapAttrPair.getValue().cast().getValue(); - unsigned oldMapNumInputs = oldMap.getNumInputs(); - SmallVector oldMapOperands( - op->operand_begin() + memRefOperandPos + 1, - op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs); - - // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'. - SmallVector oldMemRefOperands; - SmallVector affineApplyOps; - oldMemRefOperands.reserve(oldMemRefRank); - if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) { - for (auto resultExpr : oldMap.getResults()) { - auto singleResMap = AffineMap::get(oldMap.getNumDims(), - oldMap.getNumSymbols(), resultExpr); - auto afOp = builder.create(op->getLoc(), singleResMap, - oldMapOperands); - oldMemRefOperands.push_back(afOp); - affineApplyOps.push_back(afOp); - } - } else { - oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end()); - } - - // Construct new indices as a remap of the old ones if a remapping has been - // provided. The indices of a memref come right after it, i.e., - // at position memRefOperandPos + 1. - SmallVector remapOperands; - remapOperands.reserve(extraOperands.size() + oldMemRefRank + - symbolOperands.size()); - remapOperands.append(extraOperands.begin(), extraOperands.end()); - remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end()); - remapOperands.append(symbolOperands.begin(), symbolOperands.end()); - - SmallVector remapOutputs; - remapOutputs.reserve(oldMemRefRank); - - if (indexRemap && - indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) { - // Remapped indices. - for (auto resultExpr : indexRemap.getResults()) { - auto singleResMap = AffineMap::get( - indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr); - auto afOp = builder.create(op->getLoc(), singleResMap, - remapOperands); - remapOutputs.push_back(afOp); - affineApplyOps.push_back(afOp); - } - } else { - // No remapping specified. - remapOutputs.assign(remapOperands.begin(), remapOperands.end()); - } - - SmallVector newMapOperands; - newMapOperands.reserve(newMemRefRank); - - // Prepend 'extraIndices' in 'newMapOperands'. - for (Value extraIndex : extraIndices) { - assert(extraIndex.getDefiningOp()->getNumResults() == 1 && - "single result op's expected to generate these indices"); - assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) && - "invalid memory op index"); - newMapOperands.push_back(extraIndex); - } - - // Append 'remapOutputs' to 'newMapOperands'. - newMapOperands.append(remapOutputs.begin(), remapOutputs.end()); - - // Create new fully composed AffineMap for new op to be created. - assert(newMapOperands.size() == newMemRefRank); - auto newMap = builder.getMultiDimIdentityMap(newMemRefRank); - // TODO: Avoid creating/deleting temporary AffineApplyOps here. - fullyComposeAffineMapAndOperands(&newMap, &newMapOperands); - newMap = simplifyAffineMap(newMap); - canonicalizeMapAndOperands(&newMap, &newMapOperands); - // Remove any affine.apply's that became dead as a result of composition. - for (Value value : affineApplyOps) - if (value.use_empty()) - value.getDefiningOp()->erase(); - - OperationState state(op->getLoc(), op->getName()); - // Construct the new operation using this memref. - state.operands.reserve(op->getNumOperands() + extraIndices.size()); - // Insert the non-memref operands. - state.operands.append(op->operand_begin(), - op->operand_begin() + memRefOperandPos); - // Insert the new memref value. - state.operands.push_back(newMemRef); - - // Insert the new memref map operands. - state.operands.append(newMapOperands.begin(), newMapOperands.end()); - - // Insert the remaining operands unmodified. - state.operands.append(op->operand_begin() + memRefOperandPos + 1 + - oldMapNumInputs, - op->operand_end()); - - // Result types don't change. Both memref's are of the same elemental type. - state.types.reserve(op->getNumResults()); - for (auto result : op->getResults()) - state.types.push_back(result.getType()); - - // Add attribute for 'newMap', other Attributes do not change. - auto newMapAttr = AffineMapAttr::get(newMap); - for (auto namedAttr : op->getAttrs()) { - if (namedAttr.getName() == oldMapAttrPair.getName()) - state.attributes.push_back({namedAttr.getName(), newMapAttr}); - else - state.attributes.push_back(namedAttr); - } - - // Create the new operation. - auto *repOp = builder.createOperation(state); - op->replaceAllUsesWith(repOp); - op->erase(); - - return success(); -} - -LogicalResult mlir::replaceAllMemRefUsesWith( - Value oldMemRef, Value newMemRef, ArrayRef extraIndices, - AffineMap indexRemap, ArrayRef extraOperands, - ArrayRef symbolOperands, Operation *domOpFilter, - Operation *postDomOpFilter, bool allowNonDereferencingOps, - bool replaceInDeallocOp) { - unsigned newMemRefRank = newMemRef.getType().cast().getRank(); - (void)newMemRefRank; // unused in opt mode - unsigned oldMemRefRank = oldMemRef.getType().cast().getRank(); - (void)oldMemRefRank; - if (indexRemap) { - assert(indexRemap.getNumSymbols() == symbolOperands.size() && - "symbol operand count mismatch"); - assert(indexRemap.getNumInputs() == - extraOperands.size() + oldMemRefRank + symbolOperands.size()); - assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank); - } else { - assert(oldMemRefRank + extraIndices.size() == newMemRefRank); - } - - // Assert same elemental type. - assert(oldMemRef.getType().cast().getElementType() == - newMemRef.getType().cast().getElementType()); - - std::unique_ptr domInfo; - std::unique_ptr postDomInfo; - if (domOpFilter) - domInfo = - std::make_unique(domOpFilter->getParentOfType()); - - if (postDomOpFilter) - postDomInfo = std::make_unique( - postDomOpFilter->getParentOfType()); - - // Walk all uses of old memref; collect ops to perform replacement. We use a - // DenseSet since an operation could potentially have multiple uses of a - // memref (although rare), and the replacement later is going to erase ops. - DenseSet opsToReplace; - for (auto *op : oldMemRef.getUsers()) { - // Skip this use if it's not dominated by domOpFilter. - if (domOpFilter && !domInfo->dominates(domOpFilter, op)) - continue; - - // Skip this use if it's not post-dominated by postDomOpFilter. - if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op)) - continue; - - // Skip dealloc's - no replacement is necessary, and a memref replacement - // at other uses doesn't hurt these dealloc's. - if (isa(op) && !replaceInDeallocOp) - continue; - - // Check if the memref was used in a non-dereferencing context. It is fine - // for the memref to be used in a non-dereferencing way outside of the - // region where this replacement is happening. - if (!isa(*op)) { - if (!allowNonDereferencingOps) { - LLVM_DEBUG(llvm::dbgs() - << "Memref replacement failed: non-deferencing memref op: \n" - << *op << '\n'); - return failure(); - } - // Non-dereferencing ops with the MemRefsNormalizable trait are - // supported for replacement. - if (!op->hasTrait()) { - LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a " - "memrefs normalizable trait: \n" - << *op << '\n'); - return failure(); - } - } - - // We'll first collect and then replace --- since replacement erases the op - // that has the use, and that op could be postDomFilter or domFilter itself! - opsToReplace.insert(op); - } - - for (auto *op : opsToReplace) { - if (failed(replaceAllMemRefUsesWith( - oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands, - symbolOperands, allowNonDereferencingOps))) - llvm_unreachable("memref replacement guaranteed to succeed here"); - } - - return success(); -} - -/// Given an operation, inserts one or more single result affine -/// apply operations, results of which are exclusively used by this operation -/// operation. The operands of these newly created affine apply ops are -/// guaranteed to be loop iterators or terminal symbols of a function. -/// -/// Before -/// -/// affine.for %i = 0 to #map(%N) -/// %idx = affine.apply (d0) -> (d0 mod 2) (%i) -/// "send"(%idx, %A, ...) -/// "compute"(%idx) -/// -/// After -/// -/// affine.for %i = 0 to #map(%N) -/// %idx = affine.apply (d0) -> (d0 mod 2) (%i) -/// "send"(%idx, %A, ...) -/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i) -/// "compute"(%idx_) -/// -/// This allows applying different transformations on send and compute (for eg. -/// different shifts/delays). -/// -/// Returns nullptr either if none of opInst's operands were the result of an -/// affine.apply and thus there was no affine computation slice to create, or if -/// all the affine.apply op's supplying operands to this opInst did not have any -/// uses besides this opInst; otherwise returns the list of affine.apply -/// operations created in output argument `sliceOps`. -void mlir::createAffineComputationSlice( - Operation *opInst, SmallVectorImpl *sliceOps) { - // Collect all operands that are results of affine apply ops. - SmallVector subOperands; - subOperands.reserve(opInst->getNumOperands()); - for (auto operand : opInst->getOperands()) - if (isa_and_nonnull(operand.getDefiningOp())) - subOperands.push_back(operand); - - // Gather sequence of AffineApplyOps reachable from 'subOperands'. - SmallVector affineApplyOps; - getReachableAffineApplyOps(subOperands, affineApplyOps); - // Skip transforming if there are no affine maps to compose. - if (affineApplyOps.empty()) - return; - - // Check if all uses of the affine apply op's lie only in this op op, in - // which case there would be nothing to do. - bool localized = true; - for (auto *op : affineApplyOps) { - for (auto result : op->getResults()) { - for (auto *user : result.getUsers()) { - if (user != opInst) { - localized = false; - break; - } - } - } - } - if (localized) - return; - - OpBuilder builder(opInst); - SmallVector composedOpOperands(subOperands); - auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size()); - fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands); - - // Create an affine.apply for each of the map results. - sliceOps->reserve(composedMap.getNumResults()); - for (auto resultExpr : composedMap.getResults()) { - auto singleResMap = AffineMap::get(composedMap.getNumDims(), - composedMap.getNumSymbols(), resultExpr); - sliceOps->push_back(builder.create( - opInst->getLoc(), singleResMap, composedOpOperands)); - } - - // Construct the new operands that include the results from the composed - // affine apply op above instead of existing ones (subOperands). So, they - // differ from opInst's operands only for those operands in 'subOperands', for - // which they will be replaced by the corresponding one from 'sliceOps'. - SmallVector newOperands(opInst->getOperands()); - for (unsigned i = 0, e = newOperands.size(); i < e; i++) { - // Replace the subOperands from among the new operands. - unsigned j, f; - for (j = 0, f = subOperands.size(); j < f; j++) { - if (newOperands[i] == subOperands[j]) - break; - } - if (j < subOperands.size()) { - newOperands[i] = (*sliceOps)[j]; - } - } - for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++) { - opInst->setOperand(idx, newOperands[idx]); - } -} - -/// Enum to set patterns of affine expr in tiled-layout map. -/// TileFloorDiv: div -/// TileMod: mod -/// TileNone: None of the above -/// Example: -/// #tiled_2d_128x256 = affine_map<(d0, d1) -/// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)> -/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv -/// "d0 mod 128" and "d1 mod 256" ==> TileMod -enum TileExprPattern { TileFloorDiv, TileMod, TileNone }; - -/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions -/// being floordiv'ed by respective tile sizes appeare in a mod with the same -/// tile sizes, and no other expression involves those k dimensions. This -/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for -/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a -/// tiled layout, an empty vector is returned. -static LogicalResult getTileSizePos( - AffineMap map, - SmallVectorImpl> &tileSizePos) { - // Create `floordivExprs` which is a vector of tuples including LHS and RHS of - // `floordiv` and its position in `map` output. - // Example: #tiled_2d_128x256 = affine_map<(d0, d1) - // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)> - // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}. - SmallVector, 4> floordivExprs; - unsigned pos = 0; - for (AffineExpr expr : map.getResults()) { - if (expr.getKind() == AffineExprKind::FloorDiv) { - AffineBinaryOpExpr binaryExpr = expr.cast(); - if (binaryExpr.getRHS().isa()) - floordivExprs.emplace_back( - std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos)); - } - pos++; - } - // Not tiled layout if `floordivExprs` is empty. - if (floordivExprs.empty()) { - tileSizePos = SmallVector>{}; - return success(); - } - - // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is - // not tiled layout. - for (std::tuple fexpr : floordivExprs) { - AffineExpr floordivExprLHS = std::get<0>(fexpr); - AffineExpr floordivExprRHS = std::get<1>(fexpr); - unsigned floordivPos = std::get<2>(fexpr); - - // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS - // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used - // other expr, the map is not tiled layout. Example of non tiled layout: - // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)> - // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)> - // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod - // 256)> - bool found = false; - pos = 0; - for (AffineExpr expr : map.getResults()) { - bool notTiled = false; - if (pos != floordivPos) { - expr.walk([&](AffineExpr e) { - if (e == floordivExprLHS) { - if (expr.getKind() == AffineExprKind::Mod) { - AffineBinaryOpExpr binaryExpr = expr.cast(); - // If LHS and RHS of `mod` are the same with those of floordiv. - if (floordivExprLHS == binaryExpr.getLHS() && - floordivExprRHS == binaryExpr.getRHS()) { - // Save tile size (RHS of `mod`), and position of `floordiv` and - // `mod` if same expr with `mod` is not found yet. - if (!found) { - tileSizePos.emplace_back( - std::make_tuple(binaryExpr.getRHS(), floordivPos, pos)); - found = true; - } else { - // Non tiled layout: Have multilpe `mod` with the same LHS. - // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 - // mod 256, d2 mod 256)> - notTiled = true; - } - } else { - // Non tiled layout: RHS of `mod` is different from `floordiv`. - // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 - // mod 128)> - notTiled = true; - } - } else { - // Non tiled layout: LHS is the same, but not `mod`. - // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 - // floordiv 256)> - notTiled = true; - } - } - }); - } - if (notTiled) { - tileSizePos = SmallVector>{}; - return success(); - } - pos++; - } - } - return success(); -} - -/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic -/// after normalization. Dimensions that include dynamic dimensions in the map -/// output will become dynamic dimensions. Return true if `dim` is dynamic -/// dimension. -/// -/// Example: -/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)> -/// -/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic. -/// memref<4x?xf32, #map0> ==> memref<4x?x?xf32> -static bool -isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, - SmallVectorImpl &inMemrefTypeDynDims, - MLIRContext *context) { - bool isDynamicDim = false; - AffineExpr expr = layoutMap.getResults()[dim]; - // Check if affine expr of the dimension includes dynamic dimension of input - // memrefType. - expr.walk([&inMemrefTypeDynDims, &isDynamicDim, &context](AffineExpr e) { - if (e.isa()) { - for (unsigned dm : inMemrefTypeDynDims) { - if (e == getAffineDimExpr(dm, context)) { - isDynamicDim = true; - } - } - } - }); - return isDynamicDim; -} - -/// Create affine expr to calculate dimension size for a tiled-layout map. -static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, - TileExprPattern pat) { - // Create map output for the patterns. - // "floordiv " ==> "ceildiv " - // "mod " ==> "" - AffineExpr newMapOutput; - AffineBinaryOpExpr binaryExpr = nullptr; - switch (pat) { - case TileExprPattern::TileMod: - binaryExpr = oldMapOutput.cast(); - newMapOutput = binaryExpr.getRHS(); - break; - case TileExprPattern::TileFloorDiv: - binaryExpr = oldMapOutput.cast(); - newMapOutput = getAffineBinaryOpExpr( - AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS()); - break; - default: - newMapOutput = oldMapOutput; - } - return newMapOutput; -} - -/// Create new maps to calculate each dimension size of `newMemRefType`, and -/// create `newDynamicSizes` from them by using AffineApplyOp. -/// -/// Steps for normalizing dynamic memrefs for a tiled layout map -/// Example: -/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)> -/// %0 = dim %arg0, %c1 :memref<4x?xf32> -/// %1 = alloc(%0) : memref<4x?xf32, #map0> -/// -/// (Before this function) -/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only -/// single layout map is supported. -/// -/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It -/// is memref<4x?x?xf32> in the above example. -/// -/// (In this function) -/// 3. Create new maps to calculate each dimension of the normalized memrefType -/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the -/// dimension size can be calculated by replacing "floordiv " with -/// "ceildiv " and "mod " with "". -/// - New map in the above example -/// #map0 = affine_map<(d0, d1) -> (d0)> -/// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)> -/// #map2 = affine_map<(d0, d1) -> (32)> -/// -/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp -/// is used in dynamicSizes of new AllocOp. -/// %0 = dim %arg0, %c1 : memref<4x?xf32> -/// %c4 = arith.constant 4 : index -/// %1 = affine.apply #map1(%c4, %0) -/// %2 = affine.apply #map2(%c4, %0) -static void createNewDynamicSizes(MemRefType oldMemRefType, - MemRefType newMemRefType, AffineMap map, - memref::AllocOp *allocOp, OpBuilder b, - SmallVectorImpl &newDynamicSizes) { - // Create new input for AffineApplyOp. - SmallVector inAffineApply; - ArrayRef oldMemRefShape = oldMemRefType.getShape(); - unsigned dynIdx = 0; - for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) { - if (oldMemRefShape[d] < 0) { - // Use dynamicSizes of allocOp for dynamic dimension. - inAffineApply.emplace_back(allocOp->dynamicSizes()[dynIdx]); - dynIdx++; - } else { - // Create ConstantOp for static dimension. - Attribute constantAttr = - b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]); - inAffineApply.emplace_back( - b.create(allocOp->getLoc(), constantAttr)); - } - } - - // Create new map to calculate each dimension size of new memref for each - // original map output. Only for dynamic dimesion of `newMemRefType`. - unsigned newDimIdx = 0; - ArrayRef newMemRefShape = newMemRefType.getShape(); - SmallVector> tileSizePos; - (void)getTileSizePos(map, tileSizePos); - for (AffineExpr expr : map.getResults()) { - if (newMemRefShape[newDimIdx] < 0) { - // Create new maps to calculate each dimension size of new memref. - enum TileExprPattern pat = TileExprPattern::TileNone; - for (auto pos : tileSizePos) { - if (newDimIdx == std::get<1>(pos)) - pat = TileExprPattern::TileFloorDiv; - else if (newDimIdx == std::get<2>(pos)) - pat = TileExprPattern::TileMod; - } - AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat); - AffineMap newMap = - AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput); - Value affineApp = - b.create(allocOp->getLoc(), newMap, inAffineApply); - newDynamicSizes.emplace_back(affineApp); - } - newDimIdx++; - } -} - -// TODO: Currently works for static memrefs with a single layout map. -LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) { - MemRefType memrefType = allocOp->getType(); - OpBuilder b(*allocOp); - - // Fetch a new memref type after normalizing the old memref to have an - // identity map layout. - MemRefType newMemRefType = - normalizeMemRefType(memrefType, b, allocOp->symbolOperands().size()); - if (newMemRefType == memrefType) - // Either memrefType already had an identity map or the map couldn't be - // transformed to an identity map. - return failure(); - - Value oldMemRef = allocOp->getResult(); - - SmallVector symbolOperands(allocOp->symbolOperands()); - AffineMap layoutMap = memrefType.getLayout().getAffineMap(); - memref::AllocOp newAlloc; - // Check if `layoutMap` is a tiled layout. Only single layout map is - // supported for normalizing dynamic memrefs. - SmallVector> tileSizePos; - (void)getTileSizePos(layoutMap, tileSizePos); - if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) { - MemRefType oldMemRefType = oldMemRef.getType().cast(); - SmallVector newDynamicSizes; - createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b, - newDynamicSizes); - // Add the new dynamic sizes in new AllocOp. - newAlloc = - b.create(allocOp->getLoc(), newMemRefType, - newDynamicSizes, allocOp->alignmentAttr()); - } else { - newAlloc = b.create(allocOp->getLoc(), newMemRefType, - allocOp->alignmentAttr()); - } - // Replace all uses of the old memref. - if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc, - /*extraIndices=*/{}, - /*indexRemap=*/layoutMap, - /*extraOperands=*/{}, - /*symbolOperands=*/symbolOperands, - /*domOpFilter=*/nullptr, - /*postDomOpFilter=*/nullptr, - /*allowNonDereferencingOps=*/true))) { - // If it failed (due to escapes for example), bail out. - newAlloc.erase(); - return failure(); - } - // Replace any uses of the original alloc op and erase it. All remaining uses - // have to be dealloc's; RAMUW above would've failed otherwise. - assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) { - return isa(op); - })); - oldMemRef.replaceAllUsesWith(newAlloc); - allocOp->erase(); - return success(); -} - -MemRefType mlir::normalizeMemRefType(MemRefType memrefType, OpBuilder b, - unsigned numSymbolicOperands) { - unsigned rank = memrefType.getRank(); - if (rank == 0) - return memrefType; - - if (memrefType.getLayout().isIdentity()) { - // Either no maps is associated with this memref or this memref has - // a trivial (identity) map. - return memrefType; - } - AffineMap layoutMap = memrefType.getLayout().getAffineMap(); - - // We don't do any checks for one-to-one'ness; we assume that it is - // one-to-one. - - // Normalize only static memrefs and dynamic memrefs with a tiled-layout map - // for now. - // TODO: Normalize the other types of dynamic memrefs. - SmallVector> tileSizePos; - (void)getTileSizePos(layoutMap, tileSizePos); - if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty()) - return memrefType; - - // We have a single map that is not an identity map. Create a new memref - // with the right shape and an identity layout map. - ArrayRef shape = memrefType.getShape(); - // FlatAffineConstraint may later on use symbolicOperands. - FlatAffineConstraints fac(rank, numSymbolicOperands); - SmallVector memrefTypeDynDims; - for (unsigned d = 0; d < rank; ++d) { - // Use constraint system only in static dimensions. - if (shape[d] > 0) { - fac.addBound(FlatAffineConstraints::LB, d, 0); - fac.addBound(FlatAffineConstraints::UB, d, shape[d] - 1); - } else { - memrefTypeDynDims.emplace_back(d); - } - } - // We compose this map with the original index (logical) space to derive - // the upper bounds for the new index space. - unsigned newRank = layoutMap.getNumResults(); - if (failed(fac.composeMatchingMap(layoutMap))) - return memrefType; - // TODO: Handle semi-affine maps. - // Project out the old data dimensions. - fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds()); - SmallVector newShape(newRank); - for (unsigned d = 0; d < newRank; ++d) { - // Check if each dimension of normalized memrefType is dynamic. - bool isDynDim = isNormalizedMemRefDynamicDim( - d, layoutMap, memrefTypeDynDims, b.getContext()); - if (isDynDim) { - newShape[d] = -1; - } else { - // The lower bound for the shape is always zero. - auto ubConst = fac.getConstantBound(FlatAffineConstraints::UB, d); - // For a static memref and an affine map with no symbols, this is - // always bounded. - assert(ubConst.hasValue() && "should always have an upper bound"); - if (ubConst.getValue() < 0) - // This is due to an invalid map that maps to a negative space. - return memrefType; - // If dimension of new memrefType is dynamic, the value is -1. - newShape[d] = ubConst.getValue() + 1; - } - } - - // Create the new memref type after trivializing the old layout map. - MemRefType newMemRefType = - MemRefType::Builder(memrefType) - .setShape(newShape) - .setLayout(AffineMapAttr::get(b.getMultiDimIdentityMap(newRank))); - - return newMemRefType; -} diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt --- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt @@ -3,7 +3,11 @@ TestAffineDataCopy.cpp TestAffineLoopUnswitching.cpp TestAffineLoopParametricTiling.cpp + TestLoopFusion.cpp + TestLoopMapping.cpp + TestLoopParametricTiling.cpp TestLoopPermutation.cpp + TestLoopUnrolling.cpp TestVectorizationUtils.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp --- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp +++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp @@ -13,10 +13,10 @@ #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/Passes.h" #define PASS_NAME "test-affine-data-copy" diff --git a/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp b/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp --- a/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp +++ b/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Passes.h" -#include "mlir/Transforms/LoopUtils.h" using namespace mlir; diff --git a/mlir/test/lib/Transforms/TestLoopFusion.cpp b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp rename from mlir/test/lib/Transforms/TestLoopFusion.cpp rename to mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp --- a/mlir/test/lib/Transforms/TestLoopFusion.cpp +++ b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp @@ -12,11 +12,10 @@ #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopFusionUtils.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/LoopFusionUtils.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Passes.h" #define DEBUG_TYPE "test-loop-fusion" diff --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp rename from mlir/test/lib/Transforms/TestLoopMapping.cpp rename to mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp --- a/mlir/test/lib/Transforms/TestLoopMapping.cpp +++ b/mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp @@ -12,11 +12,10 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/Builders.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Passes.h" #include "llvm/ADT/SetVector.h" diff --git a/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp b/mlir/test/lib/Dialect/Affine/TestLoopParametricTiling.cpp rename from mlir/test/lib/Transforms/TestLoopParametricTiling.cpp rename to mlir/test/lib/Dialect/Affine/TestLoopParametricTiling.cpp --- a/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp +++ b/mlir/test/lib/Dialect/Affine/TestLoopParametricTiling.cpp @@ -10,11 +10,10 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/Builders.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Passes.h" using namespace mlir; diff --git a/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp b/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp --- a/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp +++ b/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp @@ -12,9 +12,8 @@ #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Passes.h" #define PASS_NAME "test-loop-permutation" diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Dialect/Affine/TestLoopUnrolling.cpp rename from mlir/test/lib/Transforms/TestLoopUnrolling.cpp rename to mlir/test/lib/Dialect/Affine/TestLoopUnrolling.cpp --- a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp +++ b/mlir/test/lib/Dialect/Affine/TestLoopUnrolling.cpp @@ -10,13 +10,12 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Builders.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/LoopUtils.h" -#include "mlir/Transforms/Passes.h" using namespace mlir; diff --git a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp --- a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp +++ b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/Vector/VectorUtils.h" @@ -21,7 +22,6 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Diagnostics.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/Passes.h" #include "llvm/ADT/STLExtras.h" diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -2,10 +2,6 @@ add_mlir_library(MLIRTestTransforms TestConstantFold.cpp TestInlining.cpp - TestLoopFusion.cpp - TestLoopMapping.cpp - TestLoopParametricTiling.cpp - TestLoopUnrolling.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Transforms/TestConstantFold.cpp b/mlir/test/lib/Transforms/TestConstantFold.cpp --- a/mlir/test/lib/Transforms/TestConstantFold.cpp +++ b/mlir/test/lib/Transforms/TestConstantFold.cpp @@ -9,7 +9,6 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/FoldUtils.h" #include "mlir/Transforms/Passes.h" -#include "mlir/Transforms/Utils.h" using namespace mlir; diff --git a/mlir/unittests/Transforms/CMakeLists.txt b/mlir/unittests/Transforms/CMakeLists.txt --- a/mlir/unittests/Transforms/CMakeLists.txt +++ b/mlir/unittests/Transforms/CMakeLists.txt @@ -4,4 +4,5 @@ ) target_link_libraries(MLIRTransformsTests PRIVATE + MLIRParser MLIRTransforms)