diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -238,7 +238,8 @@ def MemRef_CastOp : MemRef_Op<"cast", [ NoSideEffect, SameOperandsAndResultShape, DeclareOpInterfaceMethods, - ViewLikeOpInterface + ViewLikeOpInterface, + MemRefsNormalizable ]> { let summary = "memref cast operation"; let description = [{ @@ -387,7 +388,7 @@ // DimOp //===----------------------------------------------------------------------===// -def DimOp : MemRef_Op<"dim", [NoSideEffect]> { +def DimOp : MemRef_Op<"dim", [NoSideEffect, MemRefsNormalizable]> { let summary = "dimension index operation"; let description = [{ The `dim` operation takes a memref and a dimension operand of type `index`. diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Transforms/NormalizeMemRefs.cpp --- a/mlir/lib/Transforms/NormalizeMemRefs.cpp +++ b/mlir/lib/Transforms/NormalizeMemRefs.cpp @@ -37,6 +37,10 @@ void updateFunctionSignature(FuncOp funcOp, ModuleOp moduleOp); void setCalleesAndCallersNonNormalizable(FuncOp funcOp, ModuleOp moduleOp, DenseSet &normalizableFuncs); + // The affine.apply op is needed to normalize dynamic memrefs. + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } Operation *createOpResultsNormalized(FuncOp funcOp, Operation *oldOp); }; diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp --- a/mlir/lib/Transforms/Utils/Utils.cpp +++ b/mlir/lib/Transforms/Utils/Utils.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "mlir/Transforms/Utils.h" - #include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/Utils.h" @@ -24,6 +23,8 @@ #include "mlir/Support/MathExtras.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TypeSwitch.h" +#include + using namespace mlir; // Perform the replacement in `op`. @@ -380,6 +381,249 @@ } } +/// Enum to set patterns of affine expr in tiled-layout map. +/// TileFloorDiv: div +/// TileMod: mod +/// TileNone: None of the above +/// Example: +/// #tiled_2d_128x256 = affine_map<(d0, d1) +/// -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)> +/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv +/// "d0 mod 128" and "d1 mod 256" ==> TileMod +enum TileExprPattern { TileFloorDiv, TileMod, TileNone }; + +/// Check if `map` is a tiled layout. This returns a vector of tuples including +/// AffineExpr for tile size, position of `floordiv`, and that of `mod. If it is +/// not a tiled layout, the vector is empty. +static SmallVector> +getTileSizePos(AffineMap map) { + SmallVector, 2> tsPos; + SmallVector, 2> tsPosEmpty; + + // Create `floordivExprs` which is a vector of tuples including LHS and RHS of + // `floordiv` and its position in `map` output. Example: #tiled_2d_128x256 = + // affine_map<(d0, d1) + // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)> + // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}. + SmallVector, 4> floordivExprs; + unsigned pos = 0; + for (AffineExpr expr : map.getResults()) { + if (expr.getKind() == AffineExprKind::FloorDiv) { + AffineBinaryOpExpr binaryExpr = expr.cast(); + if (binaryExpr.getRHS().isa()) + floordivExprs.emplace_back( + std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos)); + } + pos++; + } + // Not tiled layout if `floordivExprs` is empty. + if (floordivExprs.empty()) + return tsPos; // empty + + // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is + // not tiled layout. + for (std::tuple fexpr : floordivExprs) { + AffineExpr floordivExprLHS = std::get<0>(fexpr); + AffineExpr floordivExprRHS = std::get<1>(fexpr); + unsigned floordivPos = std::get<2>(fexpr); + + // Walk affinexpr of `map` output except `fexpr`, and if LHS of `fexpr` is + // found, check if it is used in LHS of `mod`. If not, the map is not tiled + // layout. If it is used in `mod`, check if RHS is the same with `fexpr`. + // Example of non tiled layout: + // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)> + // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)> + // affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod + // 256)> + bool found = false; + pos = 0; + for (AffineExpr expr : map.getResults()) { + bool notTiled = false; + if (pos != floordivPos) { + expr.walk([&](AffineExpr e) { + if (e == floordivExprLHS) { + if (expr.getKind() == AffineExprKind::Mod) { + AffineBinaryOpExpr binaryExpr = expr.cast(); + // If LHS and RHS of `mod` are the same with those of floordiv. + if (floordivExprLHS == binaryExpr.getLHS() && + floordivExprRHS == binaryExpr.getRHS()) { + // Save tile size (RHS of `mod`), and position of `floordiv` and + // `mod` if same expr with `mod` is not found yet. + if (!found) { + tsPos.emplace_back( + std::make_tuple(binaryExpr.getRHS(), floordivPos, pos)); + found = true; + } else { + // Have multilpe `mod` with the same LHS. + // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 + // mod 256, d2 mod 256)> + notTiled = true; + } + } else { + // RHS of `mod` is different from `floordiv`. + // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 + // mod 128)> + notTiled = true; + } + } else { + // LHS is the same, but not `mod`. + // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 + // floordiv 256)> + notTiled = true; + } + } + }); + } + if (notTiled) + return tsPosEmpty; + pos++; + } + } + + return tsPos; +} + +/// Check if `dim` dimension of memrefType with `layoutMap` after normalization +/// is dynamic. Dimensions that include dynamic dimensions in the map output +/// will be dynamic dimensions. +/// +/// Example: +/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)> +/// +/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic. +/// memref<4x?xf32, #map0> ==> memref<4x?x?xf32> +static bool +isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, + SmallVectorImpl &inMemrefTypeDynDims, + MLIRContext *context) { + bool newDynDim = false; + AffineExpr expr = layoutMap.getResults()[dim]; + // Check if affine expr of the dimension includes dynamic dimension of input + // memrefType. + expr.walk([&inMemrefTypeDynDims, &newDynDim, &context](AffineExpr e) { + if (e.isa()) { + for (unsigned dm : inMemrefTypeDynDims) { + if (e == getAffineDimExpr(dm, context)) + newDynDim = true; + } + } + }); + return newDynDim; +} + +/// Create affine expr to calculate dimension size for a tiled-layout map. +static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput, + TileExprPattern pat) { + // Create map output for the patterns. + // "floordiv " ==> "ceildiv " + // "mod " ==> "" + AffineExpr newMapOutput; + AffineBinaryOpExpr binaryExpr = nullptr; + switch (pat) { + case TileExprPattern::TileMod: + binaryExpr = oldMapOutput.cast(); + newMapOutput = binaryExpr.getRHS(); + break; + case TileExprPattern::TileFloorDiv: + binaryExpr = oldMapOutput.cast(); + newMapOutput = getAffineBinaryOpExpr( + AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS()); + break; + default: + newMapOutput = oldMapOutput; + } + return newMapOutput; +} + +/// Create new maps to calculate each dimension size of `newMemRefType`, and +/// create `newDynamicSizes` from them by using AffineApplyOp. +/// +/// Steps for normalizing dynamic memrefs for a tiled layout map +/// Example: +/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)> +/// %0 = dim %arg0, %c1 :memref<4x?xf32> +/// %1 = alloc(%0) : memref<4x?xf32, #map0> +/// +/// (Before this function) +/// 1. Check if `map`(#map0) is a tiled layout. Only single tiled layout is +/// supported.(`isTiledLayoutMap()`)) +/// +/// 2. Create normalized memrefType(`newMemRefType`). +/// (`isNormalizedMemRefDynamicDim()`) +/// +/// (In this function) +/// 3. Create new maps to calculate each dimension size of `newMemRefType`. +/// Inputs of the new map are the same with `map`(d0, d1), and output of the new +/// map is for each dimension. In the tiled layout, the dimension size can be +/// calculated by replacing "floordiv " with "ceildiv " +/// and "mod " with "". +/// (`createDimSizeExprForTiledLayout()`) +/// - New map +/// #map0 = affine_map<(d0, d1) -> (d0)> +/// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)> +/// #map2 = affine_map<(d0, d1) -> (32)> +/// +/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp +/// is dynamicSizes for new AllocOp(`newDynamicSizes`). The inputs for +/// AffineApplyOp are created from dynamicSizes of `allocOp` for dynamic +/// dimension(%0) and created from ConstantOp for static dimension(%c4). +/// %0 = dim %arg0, %c1 : memref<4x?xf32> +/// %c4 = constant 4 : index +/// %1 = affine.apply #map1(%c4, %0) +/// %2 = affine.apply #map2(%c4, %0) +/// +/// (After this function) +/// 5. `newDynamicSizes`(%1 and %2) will be used in new AllocOp. +/// (`normalizeMemRef()`) +static void createNewDynamicSizes(MemRefType oldMemRefType, + MemRefType newMemRefType, AffineMap map, + memref::AllocOp *allocOp, OpBuilder b, + SmallVectorImpl &newDynamicSizes) { + // Create new input for AffineApplyOp. + SmallVector inAffineApply; + ArrayRef oldMemRefShape = oldMemRefType.getShape(); + unsigned dynIdx = 0; + for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) { + if (oldMemRefShape[d] < 0) { + // Use dynamicSizes of allocOp for dynamic dimension. + inAffineApply.emplace_back(allocOp->dynamicSizes()[dynIdx]); + dynIdx++; + } else { + // Create ConstantOp for static dimension. + Attribute constantAttr = + b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]); + inAffineApply.emplace_back( + b.create(allocOp->getLoc(), constantAttr)); + } + } + + // Create new map to calculate each dimension size of new memref for each + // original map output. Only for dynamic dimesion of `newMemRefType`. + unsigned newDimIdx = 0; + ArrayRef newMemRefShape = newMemRefType.getShape(); + SmallVector> tileSizePos = + getTileSizePos(map); + for (AffineExpr expr : map.getResults()) { + if (newMemRefShape[newDimIdx] < 0) { + // Create new maps to calculate each dimension size of new memref. + enum TileExprPattern pat = TileExprPattern::TileNone; + for (auto pos : tileSizePos) { + if (newDimIdx == std::get<1>(pos)) + pat = TileExprPattern::TileFloorDiv; + else if (newDimIdx == std::get<2>(pos)) + pat = TileExprPattern::TileMod; + } + AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat); + AffineMap newMap = + AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput); + Value affineApp = + b.create(allocOp->getLoc(), newMap, inAffineApply); + newDynamicSizes.emplace_back(affineApp); + } + newDimIdx++; + } +} + // TODO: Currently works for static memrefs with a single layout map. LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) { MemRefType memrefType = allocOp->getType(); @@ -397,9 +641,25 @@ Value oldMemRef = allocOp->getResult(); SmallVector symbolOperands(allocOp->symbolOperands()); - memref::AllocOp newAlloc = b.create( - allocOp->getLoc(), newMemRefType, allocOp->alignmentAttr()); AffineMap layoutMap = memrefType.getAffineMaps().front(); + memref::AllocOp newAlloc; + // Check if `layoutMap` is a tiled layout. Only single tiled layout is + // supported for normalizing dynamic memrefs. + SmallVector> tileSizePos = + getTileSizePos(layoutMap); + if (newMemRefType.getNumDynamicDims() > 0 && tileSizePos.size() != 0) { + MemRefType oldMemRefType = oldMemRef.getType().cast(); + SmallVector newDynamicSizes; + createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b, + newDynamicSizes); + // Add the new dynamic sizes in new AllocOp. + newAlloc = + b.create(allocOp->getLoc(), newMemRefType, + newDynamicSizes, allocOp->alignmentAttr()); + } else { + newAlloc = b.create(allocOp->getLoc(), newMemRefType, + allocOp->alignmentAttr()); + } // Replace all uses of the old memref. if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc, /*extraIndices=*/{}, @@ -440,8 +700,12 @@ // We don't do any checks for one-to-one'ness; we assume that it is // one-to-one. - // TODO: Only for static memref's for now. - if (memrefType.getNumDynamicDims() > 0) + // Normalize only static memrefs and dynamic memrefs with a tiled-layout map + // for now. + // TODO: Normalize the other types of dynamic memrefs. + SmallVector> tileSizePos = + getTileSizePos(layoutMaps.front()); + if (memrefType.getNumDynamicDims() > 0 && tileSizePos.size() == 0) return memrefType; // We have a single map that is not an identity map. Create a new memref @@ -449,9 +713,12 @@ ArrayRef shape = memrefType.getShape(); // FlatAffineConstraint may later on use symbolicOperands. FlatAffineConstraints fac(rank, numSymbolicOperands); + SmallVector memrefTypeDynDims; for (unsigned d = 0; d < rank; ++d) { fac.addConstantLowerBound(d, 0); fac.addConstantUpperBound(d, shape[d] - 1); + if (shape[d] < 0) + memrefTypeDynDims.emplace_back(d); } // We compose this map with the original index (logical) space to derive // the upper bounds for the new index space. @@ -464,15 +731,22 @@ fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds()); SmallVector newShape(newRank); for (unsigned d = 0; d < newRank; ++d) { + // Check if each dimension of normalized memrefType is dynamic. + bool newDynDim = isNormalizedMemRefDynamicDim( + d, layoutMap, memrefTypeDynDims, b.getContext()); // The lower bound for the shape is always zero. auto ubConst = fac.getConstantUpperBound(d); // For a static memref and an affine map with no symbols, this is // always bounded. assert(ubConst.hasValue() && "should always have an upper bound"); - if (ubConst.getValue() < 0) + if (ubConst.getValue() < 0 && memrefType.getNumDynamicDims() == 0) // This is due to an invalid map that maps to a negative space. return memrefType; - newShape[d] = ubConst.getValue() + 1; + // If dimension of new memrefType is dynamic, the value is -1. + if (newDynDim) + newShape[d] = -1; + else + newShape[d] = ubConst.getValue() + 1; } // Create the new memref type after trivializing the old layout map. diff --git a/mlir/test/Transforms/normalize-memrefs-ops-dynamic.mlir b/mlir/test/Transforms/normalize-memrefs-ops-dynamic.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Transforms/normalize-memrefs-ops-dynamic.mlir @@ -0,0 +1,251 @@ +// RUN: mlir-opt -normalize-memrefs %s -split-input-file| FileCheck %s + +// For all these cases, we test if MemRefs Normalization works with the test +// operations. These are test cases for MemRefs with dynamic dimension +// and tiled-layout map. +// * test.op_norm: this operation has the MemRefsNormalizable attribute. The tests +// that include this operation are constructed so that the normalization should +// happen. + +#map_tiled = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 floordiv 32, d3 floordiv 64, d2 mod 32, d3 mod 64)> + +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d1)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d2 ceildiv 32)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (32)> + +// Test with op_norm and maps in arguments and in the operations in the function. +// Memref has two dynamic dimensions. + +// CHECK-LABEL: test_norm_dynamic12 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref<1x?x?x1x?x64xf32>) { +func @test_norm_dynamic12(%arg0 : memref<1x?x?x14xf32, #map_tiled>) -> () { + %c1 = constant 1 : index + %c2 = constant 2 : index + %0 = memref.dim %arg0, %c1 :memref<1x?x?x14xf32, #map_tiled> + %1 = memref.dim %arg0, %c2 :memref<1x?x?x14xf32, #map_tiled> + %2 = memref.alloc(%0, %1) : memref<1x?x?x14xf32, #map_tiled> + "test.op_norm"(%arg0, %2) : (memref<1x?x?x14xf32, #map_tiled>, memref<1x?x?x14xf32, #map_tiled>) -> () + memref.dealloc %2 : memref<1x?x?x14xf32, #map_tiled> + return + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = memref.dim [[ARG_0_]], [[CST_1_]] : memref<1x?x?x1x?x64xf32> + // CHECK-DAG: [[DIM_1_:%.+]] = memref.dim [[ARG_0_]], [[CST_2_]] : memref<1x?x?x1x?x64xf32> + // CHECK-DAG: [[CST_1_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_14_:%.+]] = constant 14 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[VAR_2_:%.+]] = affine.apply #[[$MAP0]]([[CST_1_1_]], [[DIM_0_]], [[DIM_1_]], [[CST_14_]]) + // CHECK-DAG: [[VAR_3_:%.+]] = affine.apply #[[$MAP1]]([[CST_1_1_]], [[DIM_0_]], [[DIM_1_]], [[CST_14_]]) + // CHECK-DAG: [[VAR_4_:%.+]] = affine.apply #[[$MAP2]]([[CST_1_1_]], [[DIM_0_]], [[DIM_1_]], [[CST_14_]]) + // CHECK: [[RES_:%.+]] = memref.alloc([[VAR_2_]], [[VAR_3_]], [[VAR_4_]]) : memref<1x?x?x1x?x64xf32> + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref<1x?x?x1x?x64xf32>, memref<1x?x?x1x?x64xf32>) -> () + // CHECK: memref.dealloc [[RES_]] : memref<1x?x?x1x?x64xf32> + // CHECK: return +} + +// ----- + +// Test with op_norm and maps in arguments and in the operations in the function. +// All of dimensions are dynamic. + +#map_tiled1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, (d2 floordiv 4) floordiv 32, (d3 mod 8) floordiv 64, (d2 floordiv 4) mod 32, (d3 mod 8) mod 64)> + +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d1)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> ((d2 floordiv 4) ceildiv 32)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (32)> +// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0)> +// CHECK-DAG: #[[$MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> ((d3 mod 8) ceildiv 64)> +// CHECK-DAG: #[[$MAP5:.+]] = affine_map<(d0, d1, d2, d3) -> (64)> + +// CHECK-LABEL: test_norm_dynamic1234 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref) { +func @test_norm_dynamic1234(%arg0 : memref) -> () { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c3 = constant 3 : index + %0 = memref.dim %arg0, %c0 :memref + %1 = memref.dim %arg0, %c1 :memref + %2 = memref.dim %arg0, %c2 :memref + %3 = memref.dim %arg0, %c3 :memref + %4 = memref.alloc(%0, %1, %2, %3) : memref + "test.op_norm"(%arg0, %4) : (memref, memref) -> () + memref.dealloc %4 : memref + return + // CHECK-DAG: [[CST_0_:%.+]] = constant 0 : index + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-DAG: [[CST_3_:%.+]] = constant 3 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = memref.dim [[ARG_0_]], [[CST_0_]] : memref + // CHECK-DAG: [[DIM_1_:%.+]] = memref.dim [[ARG_0_]], [[CST_1_]] : memref + // CHECK-DAG: [[DIM_2_:%.+]] = memref.dim [[ARG_0_]], [[CST_2_]] : memref + // CHECK-DAG: [[DIM_3_:%.+]] = memref.dim [[ARG_0_]], [[CST_3_]] : memref + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[VAR_4_:%.+]] = affine.apply #[[$MAP3]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_5_:%.+]] = affine.apply #[[$MAP0]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_6_:%.+]] = affine.apply #[[$MAP1]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_7_:%.+]] = affine.apply #[[$MAP4]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_8_:%.+]] = affine.apply #[[$MAP2]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_9_:%.+]] = affine.apply #[[$MAP5]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK: [[RES_:%.+]] = memref.alloc([[VAR_4_]], [[VAR_5_]], [[VAR_6_]], [[VAR_7_]], [[VAR_8_]], [[VAR_9_]]) : memref + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref, memref) -> () + // CHECK: memref.dealloc [[RES_]] : memref + // CHECK: return +} + +// ----- + +// Same test with maps that are not tiled layout maps in the arguments and the operations in the function. +// This is not normalized since this is not tiled-layout map. No mod and floordiv. + +#map_not_tiled0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 - d1, d3 - d2)> + +// CHECK-DAG: #[[$MAP6:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 - d1, d3 - d2)> + +// CHECK-LABEL: func @test_norm_dynamic_not_tiled0 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref<1x?x?x14xf32, #[[$MAP6]]>) { +func @test_norm_dynamic_not_tiled0(%arg0 : memref<1x?x?x14xf32, #map_not_tiled0>) -> () { + %c1 = constant 1 : index + %c2 = constant 2 : index + %0 = memref.dim %arg0, %c1 :memref<1x?x?x14xf32, #map_not_tiled0> + %1 = memref.dim %arg0, %c2 :memref<1x?x?x14xf32, #map_not_tiled0> + %2 = memref.alloc(%0, %1) : memref<1x?x?x14xf32, #map_not_tiled0> + "test.op_norm"(%arg0, %2) : (memref<1x?x?x14xf32, #map_not_tiled0>, memref<1x?x?x14xf32, #map_not_tiled0>) -> () + memref.dealloc %2 : memref<1x?x?x14xf32, #map_not_tiled0> + return + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = memref.dim [[ARG_0_]], [[CST_1_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK-DAG: [[DIM_1_:%.+]] = memref.dim [[ARG_0_]], [[CST_2_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: [[RES_:%.+]] = memref.alloc([[DIM_0_]], [[DIM_1_]]) : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref<1x?x?x14xf32, #[[$MAP6]]>, memref<1x?x?x14xf32, #[[$MAP6]]>) -> () + // CHECK: memref.dealloc [[RES_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: return +} + +// ----- + +// Same test with maps that are not tiled layout maps in the arguments and the operations in the function. +// This is not normalized since this is not tiled-layout map. No floordiv. + +#map_not_tiled1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 - d1, d3 - d2, d2 mod 32, d3 mod 64)> + +// CHECK-DAG: #[[$MAP6:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 - d1, d3 - d2, d2 mod 32, d3 mod 64)> + +// CHECK-LABEL: func @test_norm_dynamic_not_tiled1 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref<1x?x?x14xf32, #[[$MAP6]]>) { +func @test_norm_dynamic_not_tiled1(%arg0 : memref<1x?x?x14xf32, #map_not_tiled1>) -> () { + %c1 = constant 1 : index + %c2 = constant 2 : index + %0 = memref.dim %arg0, %c1 :memref<1x?x?x14xf32, #map_not_tiled1> + %1 = memref.dim %arg0, %c2 :memref<1x?x?x14xf32, #map_not_tiled1> + %2 = memref.alloc(%0, %1) : memref<1x?x?x14xf32, #map_not_tiled1> + "test.op_norm"(%arg0, %2) : (memref<1x?x?x14xf32, #map_not_tiled1>, memref<1x?x?x14xf32, #map_not_tiled1>) -> () + memref.dealloc %2 : memref<1x?x?x14xf32, #map_not_tiled1> + return + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = memref.dim [[ARG_0_]], [[CST_1_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK-DAG: [[DIM_1_:%.+]] = memref.dim [[ARG_0_]], [[CST_2_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: [[RES_:%.+]] = memref.alloc([[DIM_0_]], [[DIM_1_]]) : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref<1x?x?x14xf32, #[[$MAP6]]>, memref<1x?x?x14xf32, #[[$MAP6]]>) -> () + // CHECK: memref.dealloc [[RES_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: return +} + +// ----- + +// Same test with maps that are not tiled layout maps in the arguments and the operations in the function. +// This is not normalized since this is not tiled-layout map. RHS of floordiv is different from that of mod. + +#map_not_tiled2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 - d1, d3 floordiv 64, d2 mod 32, d3 mod 32)> + +// CHECK-DAG: #[[$MAP7:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 - d1, d3 floordiv 64, d2 mod 32, d3 mod 32)> + +// CHECK-LABEL: func @test_norm_dynamic_not_tiled2 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref<1x?x?x14xf32, #[[$MAP7]]>) { +func @test_norm_dynamic_not_tiled2(%arg0 : memref<1x?x?x14xf32, #map_not_tiled2>) -> () { + %c1 = constant 1 : index + %c2 = constant 2 : index + %0 = memref.dim %arg0, %c1 :memref<1x?x?x14xf32, #map_not_tiled2> + %1 = memref.dim %arg0, %c2 :memref<1x?x?x14xf32, #map_not_tiled2> + %2 = memref.alloc(%0, %1) : memref<1x?x?x14xf32, #map_not_tiled2> + "test.op_norm"(%arg0, %2) : (memref<1x?x?x14xf32, #map_not_tiled2>, memref<1x?x?x14xf32, #map_not_tiled2>) -> () + memref.dealloc %2 : memref<1x?x?x14xf32, #map_not_tiled2> + return + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = memref.dim [[ARG_0_]], [[CST_1_]] : memref<1x?x?x14xf32, #[[$MAP7]]> + // CHECK-DAG: [[DIM_1_:%.+]] = memref.dim [[ARG_0_]], [[CST_2_]] : memref<1x?x?x14xf32, #[[$MAP7]]> + // CHECK: [[RES_:%.+]] = memref.alloc([[DIM_0_]], [[DIM_1_]]) : memref<1x?x?x14xf32, #[[$MAP7]]> + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref<1x?x?x14xf32, #[[$MAP7]]>, memref<1x?x?x14xf32, #[[$MAP7]]>) -> () + // CHECK: memref.dealloc [[RES_]] : memref<1x?x?x14xf32, #[[$MAP7]]> + // CHECK: return +} + +// ----- + +// Same test with maps that are not tiled layout maps in the arguments and the operations in the function. +// This is not normalized since this is not tiled-layout map. Multiple mod with the same LHS and RHS. + +#map_not_tiled3 = affine_map<(d0, d1, d2, d3) -> (d0, d1 floordiv 32, d2, d3, d1 mod 32, d1 mod 32)> + +// CHECK-DAG: #[[$MAP8:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1 floordiv 32, d2, d3, d1 mod 32, d1 mod 32)> + +// CHECK-LABEL: func @test_norm_dynamic_not_tiled3 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref<1x?x?x14xf32, #[[$MAP8]]>) { +func @test_norm_dynamic_not_tiled3(%arg0 : memref<1x?x?x14xf32, #map_not_tiled3>) -> () { + %c1 = constant 1 : index + %c2 = constant 2 : index + %0 = memref.dim %arg0, %c1 :memref<1x?x?x14xf32, #map_not_tiled3> + %1 = memref.dim %arg0, %c2 :memref<1x?x?x14xf32, #map_not_tiled3> + %2 = memref.alloc(%0, %1) : memref<1x?x?x14xf32, #map_not_tiled3> + "test.op_norm"(%arg0, %2) : (memref<1x?x?x14xf32, #map_not_tiled3>, memref<1x?x?x14xf32, #map_not_tiled3>) -> () + memref.dealloc %2 : memref<1x?x?x14xf32, #map_not_tiled3> + return + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = memref.dim [[ARG_0_]], [[CST_1_]] : memref<1x?x?x14xf32, #[[$MAP8]]> + // CHECK-DAG: [[DIM_1_:%.+]] = memref.dim [[ARG_0_]], [[CST_2_]] : memref<1x?x?x14xf32, #[[$MAP8]]> + // CHECK: [[RES_:%.+]] = memref.alloc([[DIM_0_]], [[DIM_1_]]) : memref<1x?x?x14xf32, #[[$MAP8]]> + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref<1x?x?x14xf32, #[[$MAP8]]>, memref<1x?x?x14xf32, #[[$MAP8]]>) -> () + // CHECK: memref.dealloc [[RES_]] : memref<1x?x?x14xf32, #[[$MAP8]]> + // CHECK: return +} + +// ----- + +// Same test with maps that are not tiled layout maps in the arguments and the operations in the function. +// This is not normalized since this is not tiled-layout map. floordiv and mod with the same LHS and RHS(d0 floordiv 32 and d0 mod 32), but, unrelaed d0 exists in other position. + +#map_not_tiled4 = affine_map<(d0, d1, d2, d3) -> (d0 floordiv 32, d1 floordiv 32, d0, d3, d0 mod 32, d1 mod 32)> + +// CHECK-DAG: #[[$MAP9:.+]] = affine_map<(d0, d1, d2, d3) -> (d0 floordiv 32, d1 floordiv 32, d0, d3, d0 mod 32, d1 mod 32)> + +// CHECK-LABEL: func @test_norm_dynamic_not_tiled4 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref<1x?x?x14xf32, #[[$MAP9]]>) { +func @test_norm_dynamic_not_tiled4(%arg0 : memref<1x?x?x14xf32, #map_not_tiled4>) -> () { + %c1 = constant 1 : index + %c2 = constant 2 : index + %0 = memref.dim %arg0, %c1 :memref<1x?x?x14xf32, #map_not_tiled4> + %1 = memref.dim %arg0, %c2 :memref<1x?x?x14xf32, #map_not_tiled4> + %2 = memref.alloc(%0, %1) : memref<1x?x?x14xf32, #map_not_tiled4> + "test.op_norm"(%arg0, %2) : (memref<1x?x?x14xf32, #map_not_tiled4>, memref<1x?x?x14xf32, #map_not_tiled4>) -> () + memref.dealloc %2 : memref<1x?x?x14xf32, #map_not_tiled4> + return + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = memref.dim [[ARG_0_]], [[CST_1_]] : memref<1x?x?x14xf32, #[[$MAP9]]> + // CHECK-DAG: [[DIM_1_:%.+]] = memref.dim [[ARG_0_]], [[CST_2_]] : memref<1x?x?x14xf32, #[[$MAP9]]> + // CHECK: [[RES_:%.+]] = memref.alloc([[DIM_0_]], [[DIM_1_]]) : memref<1x?x?x14xf32, #[[$MAP9]]> + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref<1x?x?x14xf32, #[[$MAP9]]>, memref<1x?x?x14xf32, #[[$MAP9]]>) -> () + // CHECK: memref.dealloc [[RES_]] : memref<1x?x?x14xf32, #[[$MAP9]]> + // CHECK: return +}