diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -834,6 +834,8 @@ custom(type($asyncToken), $asyncDependencies) ` ` `(` $dynamicSizes `)` (`` `[` $symbolOperands^ `]`)? attr-dict `:` type($memref) }]; + + let hasCanonicalizer = 1; } def GPU_DeallocOp : GPU_Op<"dealloc", [GPU_AsyncOpInterface]> { @@ -1040,7 +1042,7 @@ the same value. This op is meant to be used along with `gpu.subgroup_mma_compute`. - + Example: ```mlir diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -1089,6 +1089,44 @@ return foldMemRefCast(*this); } +//===----------------------------------------------------------------------===// +// GPU_AllocOp +//===----------------------------------------------------------------------===// +namespace { + +/// Folding of memref.dim(gpu.alloca(%size), %idx) -> %size similar to +/// `memref::AllocOp`. +struct SimplifyDimOfAllocOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(memref::DimOp dimOp, + PatternRewriter &rewriter) const override { + auto index = dimOp.index().getDefiningOp(); + if (!index) + return failure(); + + auto memrefType = dimOp.source().getType().dyn_cast(); + if (!memrefType || !memrefType.isDynamicDim(index.getValue())) + return failure(); + + auto alloc = dimOp.source().getDefiningOp(); + if (!alloc) + return failure(); + + Value substituteOp = *(alloc.dynamicSizes().begin() + + memrefType.getDynamicDimIndex(index.getValue())); + rewriter.replaceOp(dimOp, substituteOp); + return success(); + } +}; + +} // end anonymous namespace. + +void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + results.add(context); +} + #include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc" #define GET_OP_CLASSES diff --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir --- a/mlir/test/Dialect/GPU/canonicalize.mlir +++ b/mlir/test/Dialect/GPU/canonicalize.mlir @@ -9,3 +9,16 @@ gpu.memcpy %0,%1 : memref, memref return } + +// ----- + +// Test case: Folding of memref.dim(gpu.alloca(%size), %idx) -> %size +// CHECK-LABEL: func @gpu_dim_of_alloc( +// CHECK-SAME: %[[SIZE:[0-9a-z]+]]: index +// CHECK-NEXT: return %[[SIZE]] : index +func @gpu_dim_of_alloc(%size: index) -> index { + %0 = gpu.alloc(%size) : memref + %c0 = constant 0 : index + %1 = memref.dim %0, %c0 : memref + return %1 : index +}