diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -34,6 +34,7 @@ std::unique_ptr createExternalNameConversionPass(); std::unique_ptr createMemDataFlowOptPass(); std::unique_ptr createPromoteToAffinePass(); +std::unique_ptr createMemoryAllocationPass(); /// Support for inlining on FIR. bool canLegallyInline(mlir::Operation *op, mlir::Region *reg, diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -152,4 +152,22 @@ ]; } +def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::FuncOp"> { + let summary = "Convert stack to heap allocations and vice versa."; + let description = [{ + Convert stack allocations to heap allocations and vice versa based on + estimated size, lifetime, usage patterns, the call tree, etc. + }]; + let dependentDialects = [ "fir::FIROpsDialect" ]; + let options = [ + Option<"dynamicArrayOnHeap", "dynamic-array-on-heap", + "bool", /*default=*/"false", + "Allocate all arrays with runtime determined size on heap.">, + Option<"maxStackArraySize", "maximum-array-alloc-size", + "std::size_t", /*default=*/"~static_cast(0)", + "Set maximum number of elements of an array allocated on the stack."> + ]; + let constructor = "::fir::createMemoryAllocationPass()"; +} + #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -6,6 +6,7 @@ ArrayValueCopy.cpp Inliner.cpp ExternalNameConversion.cpp + MemoryAllocation.cpp MemRefDataFlowOpt.cpp RewriteLoop.cpp diff --git a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp new file mode 100644 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp @@ -0,0 +1,186 @@ +//===- MemoryAllocation.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/ADT/TypeSwitch.h" + +#define DEBUG_TYPE "flang-memory-allocation-opt" + +// Number of elements in an array does not determine where it is allocated. +static constexpr std::size_t UnlimitedArraySize = ~static_cast(0); + +namespace { +struct MemoryAllocationOptions { + // Always move dynamic array allocations to the heap. This may result in more + // heap fragmentation, so may impact performance negatively. + bool dynamicArrayOnHeap = false; + + // Number of elements in array threshold for moving to heap. In environments + // with limited stack size, moving large arrays to the heap can avoid running + // out of stack space. + std::size_t maxStackArraySize = UnlimitedArraySize; +}; + +class ReturnAnalysis { +public: + ReturnAnalysis(mlir::Operation *op) { + if (auto func = mlir::dyn_cast(op)) + for (mlir::Block &block : func) + for (mlir::Operation &i : block) + if (mlir::isa(i)) { + returnMap[op].push_back(&i); + break; + } + } + + llvm::SmallVector getReturns(mlir::Operation *func) const { + auto iter = returnMap.find(func); + if (iter != returnMap.end()) + return iter->second; + return {}; + } + +private: + llvm::DenseMap> + returnMap; +}; +} // namespace + +/// Return `true` if this allocation is to remain on the stack (`fir.alloca`). +/// Otherwise the allocation should be moved to the heap (`fir.allocmem`). +static inline bool keepStackAllocation(fir::AllocaOp alloca, mlir::Block *entry, + const MemoryAllocationOptions &options) { + // Limitation: only arrays allocated on the stack in the entry block are + // considered for now. + // TODO: Generalize the algorithm and placement of the freemem nodes. + if (alloca->getBlock() != entry) + return true; + if (auto seqTy = alloca.getInType().dyn_cast()) { + if (fir::hasDynamicSize(seqTy)) { + // Move all arrays with runtime determined size to the heap. + if (options.dynamicArrayOnHeap) + return false; + } else { + std::int64_t numberOfElements = 1; + for (std::int64_t i : seqTy.getShape()) { + numberOfElements *= i; + // If the count is suspicious, then don't change anything here. + if (numberOfElements <= 0) + return true; + } + // If the number of elements exceeds the threshold, move the allocation to + // the heap. + if (static_cast(numberOfElements) > + options.maxStackArraySize) { + LLVM_DEBUG(llvm::dbgs() + << "memory allocation opt: found " << alloca << '\n'); + return false; + } + } + } + return true; +} + +namespace { +class AllocaOpConversion : public mlir::OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + AllocaOpConversion(mlir::MLIRContext *ctx, + llvm::ArrayRef rets) + : OpRewritePattern(ctx), returnOps(rets) {} + + mlir::LogicalResult + matchAndRewrite(fir::AllocaOp alloca, + mlir::PatternRewriter &rewriter) const override { + auto loc = alloca.getLoc(); + mlir::Type varTy = alloca.getInType(); + auto unpackName = + [](llvm::Optional opt) -> llvm::StringRef { + if (opt) + return *opt; + return {}; + }; + auto uniqName = unpackName(alloca.uniq_name()); + auto bindcName = unpackName(alloca.bindc_name()); + auto heap = rewriter.create( + loc, varTy, uniqName, bindcName, alloca.typeparams(), alloca.shape()); + auto insPt = rewriter.saveInsertionPoint(); + for (mlir::Operation *retOp : returnOps) { + rewriter.setInsertionPoint(retOp); + [[maybe_unused]] auto free = rewriter.create(loc, heap); + LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: add free " << free + << " for " << heap << '\n'); + } + rewriter.restoreInsertionPoint(insPt); + rewriter.replaceOpWithNewOp( + alloca, fir::ReferenceType::get(varTy), heap); + LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: replaced " << alloca + << " with " << heap << '\n'); + return mlir::success(); + } + +private: + llvm::ArrayRef returnOps; +}; + +/// This pass can reclassify memory allocations (fir.alloca, fir.allocmem) based +/// on heuristics and settings. The intention is to allow better performance and +/// workarounds for conditions such as environments with limited stack space. +/// +/// Currently, implements two conversions from stack to heap allocation. +/// 1. If a stack allocation is an array larger than some threshold value +/// make it a heap allocation. +/// 2. If a stack allocation is an array with a runtime evaluated size make +/// it a heap allocation. +class MemoryAllocationOpt + : public fir::MemoryAllocationOptBase { +public: + void runOnOperation() override { + auto *context = &getContext(); + auto func = getOperation(); + mlir::OwningRewritePatternList patterns(context); + mlir::ConversionTarget target(*context); + MemoryAllocationOptions options = {dynamicArrayOnHeap.getValue(), + maxStackArraySize.getValue()}; + + // If func is a declaration, skip it. + if (func.empty()) + return; + + const auto &analysis = getAnalysis(); + + target.addLegalDialect(); + target.addDynamicallyLegalOp([&](fir::AllocaOp alloca) { + return keepStackAllocation(alloca, &func.front(), options); + }); + + patterns.insert(context, analysis.getReturns(func)); + if (mlir::failed( + mlir::applyPartialConversion(func, target, std::move(patterns)))) { + mlir::emitError(func.getLoc(), + "error in memory allocation optimization\n"); + signalPassFailure(); + } + } +}; +} // namespace + +std::unique_ptr fir::createMemoryAllocationPass() { + return std::make_unique(); +} diff --git a/flang/test/Fir/memory-allocation-opt.fir b/flang/test/Fir/memory-allocation-opt.fir new file mode 100644 --- /dev/null +++ b/flang/test/Fir/memory-allocation-opt.fir @@ -0,0 +1,34 @@ +// RUN: fir-opt --memory-allocation-opt="dynamic-array-on-heap=true maximum-array-alloc-size=1024" %s | FileCheck %s + +// Test for size of array being too big. + +// CHECK-LABEL: func @_QPs1( +// CHECK: %[[mem:.*]] = fir.allocmem !fir.array<1000123xi32> {bindc_name = "array", uniq_name = "_QFs1Earray"} +// CHECK: fir.call @_QPs3( +// CHECK: fir.freemem %[[mem]] +// CHECK-NEXT: return + +func @_QPs1() { + %0 = fir.alloca !fir.array<1000123xi32> {bindc_name = "array", uniq_name = "_QFs1Earray"} + fir.call @_QPs3(%0) : (!fir.ref>) -> () + return +} + +// Test for dynamic array. + +// CHECK-LABEL: func @_QPs2( +// CHECK: %[[mem:.*]] = fir.allocmem !fir.array, %{{[0-9]+}} {bindc_name = "array", uniq_name = "_QFs2Earray"} +// CHECK: fir.call @_QPs3( +// CHECK: fir.freemem %[[mem]] +// CHECK-NEXT: return + +func @_QPs2(%arg0: !fir.ref) { + %0 = fir.load %arg0 : !fir.ref + %1 = fir.convert %0 : (i32) -> index + %2 = fir.alloca !fir.array, %1 {bindc_name = "array", uniq_name = "_QFs2Earray"} + %3 = fir.convert %2 : (!fir.ref>) -> !fir.ref> + fir.call @_QPs3(%3) : (!fir.ref>) -> () + return +} +func private @_QPs3(!fir.ref>) +