diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1371,7 +1371,7 @@ // DimOp //===----------------------------------------------------------------------===// -def DimOp : Std_Op<"dim", [NoSideEffect]> { +def DimOp : Std_Op<"dim", [NoSideEffect, MemRefsNormalizable]> { let summary = "dimension index operation"; let description = [{ The `dim` operation takes a memref/tensor and a dimension operand of type diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Transforms/NormalizeMemRefs.cpp --- a/mlir/lib/Transforms/NormalizeMemRefs.cpp +++ b/mlir/lib/Transforms/NormalizeMemRefs.cpp @@ -36,6 +36,10 @@ void updateFunctionSignature(FuncOp funcOp, ModuleOp moduleOp); void setCalleesAndCallersNonNormalizable(FuncOp funcOp, ModuleOp moduleOp, DenseSet &normalizableFuncs); + // Use AffineDialect(AffineApply op) for normalizing dynamic memrefs. + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } Operation *createOpResultsNormalized(FuncOp funcOp, Operation *oldOp); }; diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp --- a/mlir/lib/Transforms/Utils/Utils.cpp +++ b/mlir/lib/Transforms/Utils/Utils.cpp @@ -379,6 +379,184 @@ } } +// Steps for normalizing dynamic memrefs for tiled layout map +// Example: +// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)> +// %0 = dim %arg0, %c1 :memref<4x?xf32> +// %1 = alloc(%0) : memref<4x?xf32, #map0> +// +// 1. Check if original map(#map0) is tiled layout. Only tiled layout is +// supported. (`isTiledLayoutMap()`) +// +// 2. Create normalized memrefType. Dimensions that include dynamic dimensions +// in the map output will be dynamic dimensions. +// (`isNormalizedMemRefDynamicDim()`) +// memref<4x?xf32, #map0> ==> memref<4x?x?xf32> +// +// 3. Create new maps to calculate each dimension size of new memref. Inputs of +// the new map is the same with original map(d0, d1), and output of the new map +// is for each dimension. In tiled layout, the dimension size can be calculated +// by replacing "floordiv " with "ceildiv " and "mod +// " with "". (`getNewMapOutputTiledLayout()` and +// `createNewDynamicSizes()`) +// - New map +// #map0 = affine_map<(d0, d1) -> (d0)> +// #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)> +// #map2 = affine_map<(d0, d1) -> (32)> +// +// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp is +// dynamicSizes for new AllocOp. The inputs for AffineApplyOp are created by +// dynamicSizes of AllocOp for dynamic dimension(%0) and created by ConstantOp +// for static dimension(%c4) (`createNewDynamicSizes()`) +// %0 = dim %arg0, %c1 : memref<4x?xf32> +// %c4 = constant 4 : index +// %1 = affine.apply #map1(%c4, %0) +// %2 = affine.apply #map2(%c4, %0) +// +// 5. Add the new dynamic sizes(%1 and %2) in new AllocOp (`normalizeMemRef()`) +// %3 = alloc(%1, %2) : memref<4x?x?xf32> + +enum TileExprPattern { TileFloorDiv, TileMod, TileNone }; + +// Check if `map` is tiled-layout map. +static bool isTiledLayoutMap(AffineMap map) { + // Find `floordiv` in `map` and get tile size. + // Example + // #tiled_2d_128x256 = affine_map<(d0, d1) + // -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)> + // In this example, `tileSizes` includes {d0, 128} and {d1, 256}. + SmallVector, 4> tileSizes; + for (AffineExpr expr : map.getResults()) { + expr.walk([&](AffineExpr e) { + if (e.getKind() == AffineExprKind::FloorDiv) { + AffineBinaryOpExpr binaryExpr = e.cast(); + if (binaryExpr.getRHS().getKind() == AffineExprKind::Constant) + tileSizes.emplace_back(binaryExpr.getLHS(), binaryExpr.getRHS()); + } + }); + } + if (tileSizes.size() == 0) + return false; + else { + // Find `mod` in `map` and check if it matches tiled layout pattern. + unsigned tileCnt = 0; + for (AffineExpr expr : map.getResults()) { + expr.walk([&](AffineExpr e) { + if (e.getKind() == AffineExprKind::Mod) { + AffineBinaryOpExpr binaryExpr = e.cast(); + for (std::pair ts : tileSizes) { + if (binaryExpr.getLHS() == ts.first && + binaryExpr.getRHS() == ts.second) + tileCnt++; + } + } + }); + } + if (tileCnt == tileSizes.size()) + return true; + else + return false; + } +} + +// Check if `dim` dimension of memrefType with `layoutMap` after normalization +// is dynamic. Dimensions that include dynamic dimensions in the map output +// will be dynamic dimensions. +// Example: affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)> +// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic. +static bool +isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap, + SmallVectorImpl &inMemrefTypeDynDims, + OpBuilder b) { + bool newDynDim = false; + AffineExpr expr = layoutMap.getResults()[dim]; + // Check if affine expr of the dimension includes dynamic dimension of input + // memrefType. + expr.walk([&inMemrefTypeDynDims, &newDynDim, &b](AffineExpr e) { + if (e.getKind() == AffineExprKind::DimId) { + for (unsigned dm : inMemrefTypeDynDims) { + if (e == getAffineDimExpr(dm, b.getContext())) + newDynDim = true; + } + } + }); + return newDynDim; +} + +// Create map output to calculate each dimension size of new memref for tiled +// layout. +static AffineExpr getNewMapOutputTiledLayout(AffineExpr oldMapOutput) { + enum TileExprPattern pat = TileExprPattern::TileNone; + // Find "mod " and "floordiv " patterns. + oldMapOutput.walk([&pat](AffineExpr e) { + if (e.getKind() == AffineExprKind::Mod) { + AffineBinaryOpExpr binaryExpr = e.cast(); + if (binaryExpr.getRHS().getKind() == AffineExprKind::Constant) + pat = TileExprPattern::TileMod; + } else if (e.getKind() == AffineExprKind::FloorDiv) { + AffineBinaryOpExpr binaryExpr = e.cast(); + if (binaryExpr.getRHS().getKind() == AffineExprKind::Constant) + pat = TileExprPattern::TileFloorDiv; + } + }); + // Create map output for the patterns. + // "floordiv " ==> "ceildiv " + // "mod " ==> "" + AffineExpr newMapOutput; + if (pat == TileExprPattern::TileMod) { + AffineBinaryOpExpr binaryExpr = oldMapOutput.cast(); + newMapOutput = binaryExpr.getRHS(); + } else if (pat == TileExprPattern::TileFloorDiv) { + AffineBinaryOpExpr binaryExpr = oldMapOutput.cast(); + newMapOutput = getAffineBinaryOpExpr( + AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS()); + } else + newMapOutput = oldMapOutput; + + return newMapOutput; +} + +// Create new maps to calculate each dimension size of `newMemRefType`, and +// create `newDynamicSizes` from them by using AffineApplyOp. +static void createNewDynamicSizes(MemRefType oldMemRefType, + MemRefType newMemRefType, AffineMap map, + AllocOp allocOp, OpBuilder b, + SmallVectorImpl &newDynamicSizes) { + // Create new input for AffineApplyOp + SmallVector inAffineApply; + ArrayRef oldMemRefShape = oldMemRefType.getShape(); + unsigned dynIdx = 0; + for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) { + if (oldMemRefShape[d] < 0) { + // Use dynamicSizes of allocOp for dynamic dimension + inAffineApply.emplace_back(allocOp.dynamicSizes()[dynIdx]); + dynIdx++; + } else { + // Create ConstantOp for static dimension. + Attribute constantAttr = + b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]); + inAffineApply.emplace_back( + b.create(allocOp.getLoc(), constantAttr)); + } + } + + // Create new map to calculate each dimension size of new memref for each + // original map output. Only for dynamic dimesion of `newMemRefType` + unsigned newDimIdx = 0; + ArrayRef newMemRefShape = newMemRefType.getShape(); + for (AffineExpr expr : map.getResults()) { + if (newMemRefShape[newDimIdx] < 0) { + AffineExpr newMapOutput = getNewMapOutputTiledLayout(expr); + AffineMap newMap = + AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput); + Value affineApp = + b.create(allocOp.getLoc(), newMap, inAffineApply); + newDynamicSizes.emplace_back(affineApp); + } + newDimIdx++; + } +} + // TODO: Currently works for static memrefs with a single layout map. LogicalResult mlir::normalizeMemRef(AllocOp allocOp) { MemRefType memrefType = allocOp.getType(); @@ -396,9 +574,19 @@ Value oldMemRef = allocOp.getResult(); SmallVector symbolOperands(allocOp.symbolOperands()); - AllocOp newAlloc = b.create(allocOp.getLoc(), newMemRefType, - allocOp.alignmentAttr()); AffineMap layoutMap = memrefType.getAffineMaps().front(); + AllocOp newAlloc; + if (newMemRefType.getNumDynamicDims() > 0 && isTiledLayoutMap(layoutMap)) { + MemRefType oldMemRefType = oldMemRef.getType().cast(); + SmallVector newDynamicSizes; + createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b, + newDynamicSizes); + newAlloc = b.create(allocOp.getLoc(), newMemRefType, + newDynamicSizes, allocOp.alignmentAttr()); + } else + newAlloc = b.create(allocOp.getLoc(), newMemRefType, + allocOp.alignmentAttr()); + // Replace all uses of the old memref. if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc, /*extraIndices=*/{}, @@ -438,8 +626,11 @@ // We don't do any checks for one-to-one'ness; we assume that it is // one-to-one. - // TODO: Only for static memref's for now. - if (memrefType.getNumDynamicDims() > 0) + // Normalize only static memrefs and dynamic memrefs with map for tiled layout + // for now. + // TODO: Normalize the other types of dynamic memrefs. + if (memrefType.getNumDynamicDims() > 0 && + !isTiledLayoutMap(layoutMaps.front())) return memrefType; // We have a single map that is not an identity map. Create a new memref @@ -447,9 +638,12 @@ ArrayRef shape = memrefType.getShape(); // FlatAffineConstraint may later on use symbolicOperands. FlatAffineConstraints fac(rank, numSymbolicOperands); + SmallVector memrefTypeDynDims; for (unsigned d = 0; d < rank; ++d) { fac.addConstantLowerBound(d, 0); fac.addConstantUpperBound(d, shape[d] - 1); + if (shape[d] < 0) + memrefTypeDynDims.emplace_back(d); } // We compose this map with the original index (logical) space to derive // the upper bounds for the new index space. @@ -462,15 +656,22 @@ fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds()); SmallVector newShape(newRank); for (unsigned d = 0; d < newRank; ++d) { + // Check if each dimension of normalized memrefType is dynamic. + bool newDynDim = + isNormalizedMemRefDynamicDim(d, layoutMap, memrefTypeDynDims, b); // The lower bound for the shape is always zero. auto ubConst = fac.getConstantUpperBound(d); // For a static memref and an affine map with no symbols, this is // always bounded. assert(ubConst.hasValue() && "should always have an upper bound"); - if (ubConst.getValue() < 0) + if (ubConst.getValue() < 0 && memrefType.getNumDynamicDims() == 0) // This is due to an invalid map that maps to a negative space. return memrefType; - newShape[d] = ubConst.getValue() + 1; + // If dimension of new memrefType is dynamic, the value is -1. + if (newDynDim) + newShape[d] = -1; + else + newShape[d] = ubConst.getValue() + 1; } // Create the new memref type after trivializing the old layout map. diff --git a/mlir/test/Transforms/normalize-memrefs-ops-dynamic.mlir b/mlir/test/Transforms/normalize-memrefs-ops-dynamic.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Transforms/normalize-memrefs-ops-dynamic.mlir @@ -0,0 +1,116 @@ +// RUN: mlir-opt -normalize-memrefs %s | FileCheck %s + +// For all these cases, we test if MemRefs Normalization works with the test +// operations. These are test cases for MemRefs with dynamic dimension +// and tiled-layout map. +// * test.op_norm: this operation has the MemRefsNormalizable attribute. The tests +// that include this operation are constructed so that the normalization should +// happen. + +#map_tiled = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 floordiv 32, d3 floordiv 64, d2 mod 32, d3 mod 64)> +#map_not_tiled = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 - d1, d3 - d2, d2 mod 32, d3 mod 64)> + +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d1)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d2 ceildiv 32)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (32)> +// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0)> +// CHECK-DAG: #[[$MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> (d3 ceildiv 64)> +// CHECK-DAG: #[[$MAP5:.+]] = affine_map<(d0, d1, d2, d3) -> (64)> +// CHECK-DAG: #[[$MAP6:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2 - d1, d3 - d2, d2 mod 32, d3 mod 64)> + +// Test with op_norm and maps in arguments and in the operations in the function. +// Memref has two dynamic dimensions + +// CHECK-LABEL: test_norm_dynamic12 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref<1x?x?x1x?x64xf32>) { +func @test_norm_dynamic12(%arg0 : memref<1x?x?x14xf32, #map_tiled>) -> () { + %c1 = constant 1 : index + %c2 = constant 2 : index + %0 = dim %arg0, %c1 :memref<1x?x?x14xf32, #map_tiled> + %1 = dim %arg0, %c2 :memref<1x?x?x14xf32, #map_tiled> + %2 = alloc(%0, %1) : memref<1x?x?x14xf32, #map_tiled> + "test.op_norm"(%arg0, %2) : (memref<1x?x?x14xf32, #map_tiled>, memref<1x?x?x14xf32, #map_tiled>) -> () + dealloc %2 : memref<1x?x?x14xf32, #map_tiled> + return + + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = dim [[ARG_0_]], [[CST_1_]] : memref<1x?x?x1x?x64xf32> + // CHECK-DAG: [[DIM_1_:%.+]] = dim [[ARG_0_]], [[CST_2_]] : memref<1x?x?x1x?x64xf32> + // CHECK-DAG: [[CST_1_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_14_:%.+]] = constant 14 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[VAR_2_:%.+]] = affine.apply #[[$MAP0]]([[CST_1_1_]], [[DIM_0_]], [[DIM_1_]], [[CST_14_]]) + // CHECK-DAG: [[VAR_3_:%.+]] = affine.apply #[[$MAP1]]([[CST_1_1_]], [[DIM_0_]], [[DIM_1_]], [[CST_14_]]) + // CHECK-DAG: [[VAR_4_:%.+]] = affine.apply #[[$MAP2]]([[CST_1_1_]], [[DIM_0_]], [[DIM_1_]], [[CST_14_]]) + // CHECK: [[RES_:%.+]] = alloc([[VAR_2_]], [[VAR_3_]], [[VAR_4_]]) : memref<1x?x?x1x?x64xf32> + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref<1x?x?x1x?x64xf32>, memref<1x?x?x1x?x64xf32>) -> () + // CHECK: dealloc [[RES_]] : memref<1x?x?x1x?x64xf32> + // CHECK: return +} + +// Test with op_norm and maps in arguments and in the operations in the function. +// All of dimensions are dynamic. + +// CHECK-LABEL: test_norm_dynamic1234 +// CHECK-SAME: ([[ARG_0_:%.+]]: memref) { +func @test_norm_dynamic1234(%arg0 : memref) -> () { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c3 = constant 3 : index + %0 = dim %arg0, %c0 :memref + %1 = dim %arg0, %c1 :memref + %2 = dim %arg0, %c2 :memref + %3 = dim %arg0, %c3 :memref + %4 = alloc(%0, %1, %2, %3) : memref + "test.op_norm"(%arg0, %4) : (memref, memref) -> () + dealloc %4 : memref + return + // CHECK-DAG: [[CST_0_:%.+]] = constant 0 : index + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-DAG: [[CST_3_:%.+]] = constant 3 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = dim [[ARG_0_]], [[CST_0_]] : memref + // CHECK-DAG: [[DIM_1_:%.+]] = dim [[ARG_0_]], [[CST_1_]] : memref + // CHECK-DAG: [[DIM_2_:%.+]] = dim [[ARG_0_]], [[CST_2_]] : memref + // CHECK-DAG: [[DIM_3_:%.+]] = dim [[ARG_0_]], [[CST_3_]] : memref + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[VAR_4_:%.+]] = affine.apply #[[$MAP3]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_5_:%.+]] = affine.apply #[[$MAP0]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_6_:%.+]] = affine.apply #[[$MAP1]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_7_:%.+]] = affine.apply #[[$MAP4]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_8_:%.+]] = affine.apply #[[$MAP2]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK-DAG: [[VAR_9_:%.+]] = affine.apply #[[$MAP5]]([[DIM_0_]], [[DIM_1_]], [[DIM_2_]], [[DIM_3_]]) + // CHECK: [[RES_:%.+]] = alloc([[VAR_4_]], [[VAR_5_]], [[VAR_6_]], [[VAR_7_]], [[VAR_8_]], [[VAR_9_]]) : memref + // CHECK: "test.op_norm"([[ARG_0_]], [[RES_]]) : (memref, memref) -> () + // CHECK: dealloc [[RES_]] : memref + // CHECK: return +} + +// Same test with maps of not tiled layout in the arguments and the operations in the function. +// This is not normalized since only tiled-layout map is supported. + +// CHECK-LABEL: func @test_norm_dynamic_not_tiled +// CHECK-SAME: ([[ARG_0_:%.+]]: memref<1x?x?x14xf32, #[[$MAP6]]>) { +func @test_norm_dynamic_not_tiled(%arg0 : memref<1x?x?x14xf32, #map_not_tiled>) -> () { + %c1 = constant 1 : index + %c2 = constant 2 : index + %0 = dim %arg0, %c1 :memref<1x?x?x14xf32, #map_not_tiled> + %1 = dim %arg0, %c2 :memref<1x?x?x14xf32, #map_not_tiled> + %2 = alloc(%0, %1) : memref<1x?x?x14xf32, #map_not_tiled> + "test.op_nonnorm"(%arg0, %2) : (memref<1x?x?x14xf32, #map_not_tiled>, memref<1x?x?x14xf32, #map_not_tiled>) -> () + dealloc %2 : memref<1x?x?x14xf32, #map_not_tiled> + return + // CHECK-DAG: [[CST_1_:%.+]] = constant 1 : index + // CHECK-DAG: [[CST_2_:%.+]] = constant 2 : index + // CHECK-NOT: separator of consecutive DAGs + // CHECK-DAG: [[DIM_0_:%.+]] = dim [[ARG_0_]], [[CST_1_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK-DAG: [[DIM_1_:%.+]] = dim [[ARG_0_]], [[CST_2_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: [[RES_:%.+]] = alloc([[DIM_0_]], [[DIM_1_]]) : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: "test.op_nonnorm"([[ARG_0_]], [[RES_]]) : (memref<1x?x?x14xf32, #[[$MAP6]]>, memref<1x?x?x14xf32, #[[$MAP6]]>) -> () + // CHECK: dealloc [[RES_]] : memref<1x?x?x14xf32, #[[$MAP6]]> + // CHECK: return +}