diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -36,7 +36,7 @@ createLinalgTilingToParallelLoopsPass(ArrayRef tileSizes = {}); std::unique_ptr> -createLinalgPromotionPass(bool dynamicBuffers); +createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca); std::unique_ptr> createLinalgPromotionPass(); /// Create a pass to convert Linalg operations to scf.for loops and diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -61,7 +61,9 @@ let constructor = "mlir::createLinalgPromotionPass()"; let options = [ Option<"dynamicBuffers", "test-promote-dynamic", "bool", - /*default=*/"false", "Test generation of dynamic promoted buffers"> + /*default=*/"false", "Test generation of dynamic promoted buffers">, + Option<"useAlloca", "test-use-alloca", "bool", + /*default=*/"false", "Test generation of alloca'ed buffers."> ]; } diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -122,6 +122,12 @@ alignment = align; return *this; } + /// Use alloca with the default allocation scheme. + bool useAlloca = false; + LinalgPromotionOptions &setUseAlloca(bool use) { + useAlloca = use; + return *this; + } /// Callback function to do the allocation of the promoted buffer. If None, /// then the default allocation scheme of allocating a memref buffer /// followed by a view operation is used. @@ -134,7 +140,6 @@ deallocationFn = deallocFn; return *this; } - /// Callback function to do the copy of data to and from the promoted /// subview. If None then a linalg.copy is used. Optional copyInFn = None; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -67,7 +67,8 @@ /// Alloc a new buffer of `size`. If `dynamicBuffers` is true allocate exactly /// the size needed, otherwise try to allocate a static bounding box. -static Value allocBuffer(Type elementType, Value size, bool dynamicBuffers, +static Value allocBuffer(const LinalgPromotionOptions &options, + Type elementType, Value size, bool dynamicBuffers, OperationFolder *folder, Optional alignment = None) { auto *ctx = size.getContext(); @@ -78,23 +79,34 @@ IntegerAttr::get(IntegerType::get(64, ctx), alignment.getValue()); if (!dynamicBuffers) if (auto cst = size.getDefiningOp()) - return std_alloc( - MemRefType::get(width * cst.getValue(), IntegerType::get(8, ctx)), - ValueRange{}, alignment_attr); + return options.useAlloca + ? std_alloca(MemRefType::get(width * cst.getValue(), + IntegerType::get(8, ctx)), + ValueRange{}, alignment_attr) + .value + : std_alloc(MemRefType::get(width * cst.getValue(), + IntegerType::get(8, ctx)), + ValueRange{}, alignment_attr) + .value; Value mul = folded_std_muli(folder, folded_std_constant_index(folder, width), size); - return std_alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul, - alignment_attr); + return options.useAlloca + ? std_alloca(MemRefType::get(-1, IntegerType::get(8, ctx)), mul, + alignment_attr) + .value + : std_alloc(MemRefType::get(-1, IntegerType::get(8, ctx)), mul, + alignment_attr) + .value; } /// Default allocation callback function. This allocates a promoted buffer when /// no call back to do so is provided. The default is to allocate a /// memref<..xi8> and return a view to get a memref type of shape /// boundingSubViewSize. -static Optional -allocBufferCallBack(OpBuilder &builder, SubViewOp subView, - ArrayRef boundingSubViewSize, bool dynamicBuffers, - Optional alignment, OperationFolder *folder) { +static Optional defaultAllocBufferCallBack( + const LinalgPromotionOptions &options, OpBuilder &builder, + SubViewOp subView, ArrayRef boundingSubViewSize, bool dynamicBuffers, + Optional alignment, OperationFolder *folder) { ShapedType viewType = subView.getType(); int64_t rank = viewType.getRank(); (void)rank; @@ -105,7 +117,7 @@ Value allocSize = one; for (auto size : llvm::enumerate(boundingSubViewSize)) allocSize = folded_std_muli(folder, allocSize, size.value()); - Value buffer = allocBuffer(viewType.getElementType(), allocSize, + Value buffer = allocBuffer(options, viewType.getElementType(), allocSize, dynamicBuffers, folder, alignment); SmallVector dynSizes(boundingSubViewSize.size(), ShapedType::kDynamicSize); @@ -118,10 +130,13 @@ /// Default implementation of deallocation of the buffer use for promotion. It /// expects to get the same value that the default allocation method returned, /// i.e. result of a ViewOp. -static LogicalResult deallocCallBack(OpBuilder &b, Value fullLocalView) { +static LogicalResult +defaultDeallocBufferCallBack(const LinalgPromotionOptions &options, + OpBuilder &b, Value fullLocalView) { auto viewOp = fullLocalView.getDefiningOp(); assert(viewOp && "expected full local view to be a ViewOp"); - std_dealloc(viewOp.source()); + if (!options.useAlloca) + std_dealloc(viewOp.source()); return success(); } @@ -182,11 +197,16 @@ : [&](OpBuilder &builder, SubViewOp subViewOp, ArrayRef boundingSubViewSize, OperationFolder *folder) -> Optional { - return allocBufferCallBack(builder, subViewOp, boundingSubViewSize, - dynamicBuffers, alignment, folder); + return defaultAllocBufferCallBack(options, builder, subViewOp, + boundingSubViewSize, dynamicBuffers, + alignment, folder); }); deallocationFn = - (options.deallocationFn ? *(options.deallocationFn) : deallocCallBack); + (options.deallocationFn + ? *(options.deallocationFn) + : [&](OpBuilder &b, Value buffer) { + return defaultDeallocBufferCallBack(options, b, buffer); + }); auto defaultCopyCallBack = [&](OpBuilder &builder, Value src, Value dst) -> LogicalResult { linalg_copy(src, dst); @@ -344,9 +364,8 @@ } // 4. Dealloc all local buffers. - for (const auto &pi : *promotedBuffersAndViews) { + for (const auto &pi : *promotedBuffersAndViews) options.deallocationFn(b, pi.second.fullLocalView); - } return op; } @@ -383,14 +402,17 @@ namespace { struct LinalgPromotionPass : public LinalgPromotionBase { LinalgPromotionPass() = default; - LinalgPromotionPass(bool dynamicBuffers) { + LinalgPromotionPass(bool dynamicBuffers, bool useAlloca) { this->dynamicBuffers = dynamicBuffers; + this->useAlloca = useAlloca; } void runOnFunction() override { OperationFolder folder(&getContext()); getFunction().walk([this, &folder](LinalgOp op) { - auto options = LinalgPromotionOptions().setDynamicBuffers(dynamicBuffers); + auto options = LinalgPromotionOptions() + .setDynamicBuffers(dynamicBuffers) + .setUseAlloca(useAlloca); if (failed(promoteSubviewsPrecondition(op, options))) return; LLVM_DEBUG(llvm::dbgs() << "Promote: " << *(op.getOperation()) << "\n"); @@ -403,8 +425,8 @@ // TODO: support more transformation options in the pass. std::unique_ptr> -mlir::createLinalgPromotionPass(bool dynamicBuffers) { - return std::make_unique(dynamicBuffers); +mlir::createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca) { + return std::make_unique(dynamicBuffers, useAlloca); } std::unique_ptr> mlir::createLinalgPromotionPass() { return std::make_unique(); diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir --- a/mlir/test/Dialect/Linalg/promote.mlir +++ b/mlir/test/Dialect/Linalg/promote.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s -linalg-promote-subviews | FileCheck %s // RUN: mlir-opt %s -linalg-promote-subviews="test-promote-dynamic" | FileCheck %s --check-prefix=DYNAMIC +// RUN: mlir-opt %s -linalg-promote-subviews="test-use-alloca" | FileCheck %s --check-prefix=ALLOCA #map1 = affine_map<(d0) -> (d0 + 2)> #map2 = affine_map<(d0) -> (d0 + 4)> @@ -45,16 +46,19 @@ // CHECK: %[[vC:.*]] = subview {{.*}} : memref /// // CHECK: %[[tmpA:.*]] = alloc() : memref<32xi8> +// ALLOCA: %[[tmpA:.*]] = alloca() : memref<32xi8> // CHECK: %[[fullA:.*]] = std.view %[[tmpA]][{{.*}}][{{.*}}] : memref<32xi8> to memref // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialA:.*]] = subview %[[fullA]]{{.*}} : memref to memref /// // CHECK: %[[tmpB:.*]] = alloc() : memref<48xi8> +// ALLOCA: %[[tmpB:.*]] = alloca() : memref<48xi8> // CHECK: %[[fullB:.*]] = std.view %[[tmpB]][{{.*}}][{{.*}}] : memref<48xi8> to memref // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialB:.*]] = subview %[[fullB]]{{.*}} : memref to memref /// // CHECK: %[[tmpC:.*]] = alloc() : memref<24xi8> +// ALLOCA: %[[tmpC:.*]] = alloca() : memref<24xi8> // CHECK: %[[fullC:.*]] = std.view %[[tmpC]][{{.*}}][{{.*}}] : memref<24xi8> to memref // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC:.*]] = subview %[[fullC]]{{.*}} : memref to memref @@ -75,6 +79,9 @@ // CHECK: dealloc %[[tmpA]] : memref<32xi8> // CHECK: dealloc %[[tmpB]] : memref<48xi8> // CHECK: dealloc %[[tmpC]] : memref<24xi8> +// ALLOCA-NOT: dealloc %[[tmpA]] : memref<32xi8> +// ALLOCA-NOT: dealloc %[[tmpB]] : memref<48xi8> +// ALLOCA-NOT: dealloc %[[tmpC]] : memref<24xi8> // -----