Index: clang/docs/tools/clang-formatted-files.txt =================================================================== --- clang/docs/tools/clang-formatted-files.txt +++ clang/docs/tools/clang-formatted-files.txt @@ -2302,6 +2302,7 @@ flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp flang/lib/Optimizer/Transforms/PassDetail.h flang/lib/Optimizer/Transforms/RewriteLoop.cpp +flang/lib/Optimizer/Transforms/StackArrays.cpp flang/lib/Parser/basic-parsers.h flang/lib/Parser/char-block.cpp flang/lib/Parser/char-buffer.cpp Index: flang/include/flang/Optimizer/Builder/MutableBox.h =================================================================== --- flang/include/flang/Optimizer/Builder/MutableBox.h +++ flang/include/flang/Optimizer/Builder/MutableBox.h @@ -123,8 +123,8 @@ void genInlinedAllocation(fir::FirOpBuilder &builder, mlir::Location loc, const fir::MutableBoxValue &box, mlir::ValueRange lbounds, mlir::ValueRange extents, - mlir::ValueRange lenParams, - llvm::StringRef allocName); + mlir::ValueRange lenParams, llvm::StringRef allocName, + bool mustBeHeap = false); void genInlinedDeallocate(fir::FirOpBuilder &builder, mlir::Location loc, const fir::MutableBoxValue &box); Index: flang/include/flang/Optimizer/Transforms/Passes.h =================================================================== --- flang/include/flang/Optimizer/Transforms/Passes.h +++ flang/include/flang/Optimizer/Transforms/Passes.h @@ -54,6 +54,7 @@ std::unique_ptr createMemDataFlowOptPass(); std::unique_ptr createPromoteToAffinePass(); std::unique_ptr createMemoryAllocationPass(); +std::unique_ptr createStackArraysPass(); std::unique_ptr createSimplifyIntrinsicsPass(); std::unique_ptr Index: flang/include/flang/Optimizer/Transforms/Passes.td =================================================================== --- flang/include/flang/Optimizer/Transforms/Passes.td +++ flang/include/flang/Optimizer/Transforms/Passes.td @@ -216,6 +216,16 @@ let constructor = "::fir::createMemoryAllocationPass()"; } +def StackArrays : Pass<"stack-arrays", "mlir::func::FuncOp"> { + let summary = "Move local array allocations from heap memory into stack memory"; + let description = [{ + Convert heap allocations for arrays, even those of unknown size, into stack + allocations. + }]; + let dependentDialects = [ "fir::FIROpsDialect" ]; + let constructor = "::fir::createStackArraysPass()"; +} + def SimplifyRegionLite : Pass<"simplify-region-lite", "mlir::ModuleOp"> { let summary = "Region simplification"; let description = [{ Index: flang/lib/Lower/Allocatable.cpp =================================================================== --- flang/lib/Lower/Allocatable.cpp +++ flang/lib/Lower/Allocatable.cpp @@ -365,7 +365,8 @@ } } fir::factory::genInlinedAllocation(builder, loc, box, lbounds, extents, - lenParams, mangleAlloc(alloc)); + lenParams, mangleAlloc(alloc), + /*mustBeHeap=*/true); } void genSimpleAllocation(const Allocation &alloc, Index: flang/lib/Optimizer/Builder/MutableBox.cpp =================================================================== --- flang/lib/Optimizer/Builder/MutableBox.cpp +++ flang/lib/Optimizer/Builder/MutableBox.cpp @@ -710,13 +710,11 @@ return newStorage; } -void fir::factory::genInlinedAllocation(fir::FirOpBuilder &builder, - mlir::Location loc, - const fir::MutableBoxValue &box, - mlir::ValueRange lbounds, - mlir::ValueRange extents, - mlir::ValueRange lenParams, - llvm::StringRef allocName) { +void fir::factory::genInlinedAllocation( + fir::FirOpBuilder &builder, mlir::Location loc, + const fir::MutableBoxValue &box, mlir::ValueRange lbounds, + mlir::ValueRange extents, mlir::ValueRange lenParams, + llvm::StringRef allocName, bool mustBeHeap) { auto lengths = getNewLengths(builder, loc, box, lenParams); llvm::SmallVector safeExtents; for (mlir::Value extent : extents) @@ -733,6 +731,7 @@ mlir::Value irBox = fir::factory::getMutableIRBox(builder, loc, box); fir::runtime::genDerivedTypeInitialize(builder, loc, irBox); } + heap->setAttr("fir.must_be_heap", builder.getBoolAttr(mustBeHeap)); } void fir::factory::genInlinedDeallocate(fir::FirOpBuilder &builder, Index: flang/lib/Optimizer/Transforms/CMakeLists.txt =================================================================== --- flang/lib/Optimizer/Transforms/CMakeLists.txt +++ flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -8,6 +8,7 @@ ArrayValueCopy.cpp ExternalNameConversion.cpp MemoryAllocation.cpp + StackArrays.cpp MemRefDataFlowOpt.cpp SimplifyRegionLite.cpp AlgebraicSimplification.cpp Index: flang/lib/Optimizer/Transforms/StackArrays.cpp =================================================================== --- /dev/null +++ flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -0,0 +1,407 @@ +//===- StackArrays.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" +#include "mlir/Analysis/DataFlow/DenseAnalysis.h" +#include "mlir/Analysis/DataFlowFramework.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Value.h" +#include "mlir/Interfaces/LoopLikeInterface.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" + +namespace fir { +#define GEN_PASS_DEF_STACKARRAYS +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +#define DEBUG_TYPE "stack-arrays" + +namespace { + +/// The state of an SSA value at each program point +enum class AllocationState { + /// This means that the allocation state of a variable cannot be determined + /// at this program point, e.g. because one route through a conditional freed + /// the variable and the other route didn't. + /// This asserts a known-unknown: different from the unknown-unknown of having + /// no AllocationState stored for a particular SSA value + Unknown, + /// Means this SSA value was allocated on the heap in this function and has + /// now been freed + Freed, + /// Means this SSA value was allocated on the heap in this function and is a + /// candidate for moving to the stack + Allocated, +}; + +static void print(llvm::raw_ostream &os, AllocationState state) { + switch (state) { + case AllocationState::Unknown: + os << "Unknown"; + break; + case AllocationState::Freed: + os << "Freed"; + break; + case AllocationState::Allocated: + os << "Allocated"; + break; + } +} + +/// Join two AllocationStates for the same value coming from different CFG +/// blocks +AllocationState join(AllocationState lhs, AllocationState rhs) { + // | Allocated | Freed | Unknown + // ========= | ========= | ========= | ========= + // Allocated | Allocated | Unknown | Allocated + // Freed | Unknown | Freed | Unknown + // Unknown | Allocated | Unknown | Unknown + if (lhs == rhs) + return lhs; + if ((lhs == AllocationState::Unknown && rhs == AllocationState::Allocated) || + (lhs == AllocationState::Allocated && rhs == AllocationState::Unknown)) { + // On some path through this function we do an allocation. This is a + // candidate for a function-scope stack allocation so long as we are sure + // it gets freed by the time we return + return AllocationState::Allocated; + } + return AllocationState::Unknown; +} + +/// Maps SSA values to their AllocationState at a particular program point +class LatticePoint : public mlir::dataflow::AbstractDenseLattice { + llvm::DenseMap map; + +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint) + using AbstractDenseLattice::AbstractDenseLattice; + + bool operator==(const LatticePoint &rhs) const { return map == rhs.map; } + + /// Join the lattice accross control-flow edges + virtual mlir::ChangeResult + join(const AbstractDenseLattice &lattice) override { + const auto &rhs = static_cast(lattice); + mlir::ChangeResult changed = mlir::ChangeResult::NoChange; + + // add everything from rhs to map, handling cases where values are in both + for (const auto &[value, rhsState] : rhs.map) { + auto it = map.find(value); + if (it != map.end()) { + // value is present in both maps + AllocationState myState = it->second; + AllocationState newState = ::join(myState, rhsState); + if (newState != myState) { + changed = mlir::ChangeResult::Change; + it->getSecond() = newState; + } + } else { + // value not present in current map: add it + map.insert({value, rhsState}); + } + } + + return changed; + } + + void print(llvm::raw_ostream &os) const override { + for (const auto &[value, state] : map) { + os << value << ": "; + ::print(os, state); + } + } + + /// Clear all modifications + mlir::ChangeResult reset() { + if (map.empty()) + return mlir::ChangeResult::NoChange; + map.clear(); + return mlir::ChangeResult::Change; + } + + /// Set the state of an SSA value + mlir::ChangeResult set(mlir::Value value, AllocationState state) { + if (map.count(value)) { + // already in map + AllocationState &oldState = map[value]; + if (oldState != state) { + map[value] = state; + return mlir::ChangeResult::Change; + } + return mlir::ChangeResult::NoChange; + } + map.insert({value, state}); + return mlir::ChangeResult::Change; + } + + /// Get values which were allocated in this function and always freed before + /// the function returns + void appendFreedValues(llvm::DenseSet &out) const { + for (auto &[value, state] : map) { + if (state == AllocationState::Freed) { + out.insert(value); + } + } + } + + std::optional get(mlir::Value val) const { + auto it = map.find(val); + if (it == map.end()) + return {}; + return it->second; + } +}; + +class AllocationAnalysis + : public mlir::dataflow::DenseDataFlowAnalysis { +public: + using DenseDataFlowAnalysis::DenseDataFlowAnalysis; + + void visitOperation(mlir::Operation *op, const LatticePoint &before, + LatticePoint *after) override { + LLVM_DEBUG(llvm::dbgs() + << "StackArrays: Visiting operation: " << *op << "\n"); + LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n"); + + // propagate before -> after + mlir::ChangeResult changed = after->join(before); + + if (auto allocmem = llvm::dyn_cast(op)) { + assert(op->getNumResults() == 1 && "fir.allocmem has one result"); + auto attr = op->getAttrOfType("fir.must_be_heap"); + if (attr && attr.getValue()) { + LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n"); + // skip allocation marked not to be moved + return; + } + mlir::Value result = op->getResult(0); + changed |= after->set(result, AllocationState::Allocated); + + auto retTy = allocmem.getAllocatedType(); + if (!retTy.isa()) { + LLVM_DEBUG(llvm::dbgs() + << "--Allocation is not for an array: skipping\n"); + return; + } + } else if (llvm::isa(op)) { + assert(op->getNumOperands() == 1 && "fir.freemem has one operand"); + mlir::Value operand = op->getOperand(0); + std::optional operandState = before.get(operand); + if (operandState && *operandState == AllocationState::Allocated) { + // don't tag things not allocated in this function as freed, so that we + // don't think they are candidates for moving to the stack + changed |= after->set(operand, AllocationState::Freed); + } + } + + LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n"); + propagateIfChanged(after, changed); + } + + /// At an entry point, the last modifications of all memory resources are + /// yet to be determined + void setToEntryState(LatticePoint *lattice) override { + propagateIfChanged(lattice, lattice->reset()); + } +}; + +/// Converts a fir.allocmem to a fir.alloca +class AllocMemConversion : public mlir::OpRewritePattern { +private: + /// allocmem operations that DFA has determined are safe to move to the stack + const llvm::DenseSet &candidateOps; + + /// Determine where to insert the alloca operation. The returned value should + /// be checked to see if it is inside a loop + static mlir::Operation *findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { + // Ideally the alloca should be inserted at the end of the function entry + // block so that we do not allocate stack space in a loop. However, + // the operands to the alloca may not be available that early, so insert it + // after the last operand becomes available + LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: " + << oldAlloc << "\n"); + + // Find when the last operand value becomes available + mlir::Block *operandsBlock = nullptr; + mlir::Operation *lastOperand = nullptr; + for (mlir::Value operand : oldAlloc.getOperands()) { + LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n"); + mlir::Operation *op = operand.getDefiningOp(); + if (!operandsBlock) + operandsBlock = op->getBlock(); + else if (operandsBlock != op->getBlock()) { + LLVM_DEBUG(llvm::dbgs() + << "----operand declared in a different block!\n"); + // Operation::isBeforeInBlock requires the operations to be in the same + // block. The best we can do is the location of the allocmem. + return oldAlloc.getOperation(); + } + if (!lastOperand || lastOperand->isBeforeInBlock(op)) + lastOperand = op; + } + + if (lastOperand) { + // there were value operands to the allocmem so insert after the last one + LLVM_DEBUG(llvm::dbgs() + << "--Placing after last operand: " << *lastOperand << "\n"); + return lastOperand; + } + + // There were no value operands to the allocmem so we are safe to insert it + // at the start of the function entry block + mlir::func::FuncOp func = oldAlloc->getParentOfType(); + assert(func && "This analysis is run on func.func"); + mlir::Block &entryBlock = func.getBlocks().front(); + LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n"); + return &entryBlock.getOperations().front(); + } + + /// Returns the alloca if it was successfully inserted, otherwise {} + static std::optional + insertAlloca(fir::AllocMemOp &oldAlloc, mlir::PatternRewriter &rewriter) { + mlir::Operation *insertionPoint = findAllocaInsertionPoint(oldAlloc); + if (!insertionPoint) + return {}; + + // check that we are not going to insert the fir.alloca into a loop + if (insertionPoint->getParentOfType()) + return {}; + + mlir::Location loc = oldAlloc.getLoc(); + mlir::Type varTy = oldAlloc.getInType(); + auto unpackName = + [](std::optional opt) -> llvm::StringRef { + if (opt) + return *opt; + return {}; + }; + llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName()); + llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName()); + + rewriter.setInsertionPointAfter(insertionPoint); + return rewriter.create(loc, varTy, uniqName, bindcName, + oldAlloc.getTypeparams(), + oldAlloc.getShape()); + } + +public: + using OpRewritePattern::OpRewritePattern; + + AllocMemConversion(mlir::MLIRContext *ctx, + const llvm::DenseSet &candidateOps) + : OpRewritePattern(ctx), candidateOps(candidateOps) {} + + mlir::LogicalResult + matchAndRewrite(fir::AllocMemOp allocmem, + mlir::PatternRewriter &rewriter) const override { + if (!candidateOps.count(allocmem.getOperation())) { + /// This allocmem was not identified as being safe to move to the stack + return mlir::success(); + } + + auto oldInsertionPt = rewriter.saveInsertionPoint(); + // add alloca operation + std::optional alloca = insertAlloca(allocmem, rewriter); + rewriter.restoreInsertionPoint(oldInsertionPt); + if (!alloca) + return mlir::failure(); + + // remove freemem operations + for (mlir::Operation *user : allocmem.getOperation()->getUsers()) { + if (mlir::isa(user)) { + rewriter.eraseOp(user); + } + } + + // replace references to heap allocation with references to stack allocation + rewriter.replaceAllUsesWith(allocmem.getResult(), alloca->getResult()); + + // remove allocmem operation + rewriter.eraseOp(allocmem.getOperation()); + + return mlir::success(); + } +}; + +class StackArraysPass : public fir::impl::StackArraysBase { +public: + StackArraysPass() = default; + StackArraysPass(const StackArraysPass &pass) {} + + llvm::StringRef getDescription() const override { + return "Move heap allocated array temporaries to the stack"; + } + + Statistic runCount{this, "stackArraysRunCount", + "Number of heap arrays moved to the stack"}; + + void runOnOperation() override { + // TODO: move this to analysis + mlir::Operation *op = getOperation(); + mlir::func::FuncOp func = mlir::dyn_cast(op); + assert(func && "This pass is defined as walking func.func"); + + mlir::DataFlowSolver solver; + solver.load(); // required to mark blocks + // live + solver.load(); + if (failed(solver.initializeAndRun(op))) { + llvm::errs() << "DataFlowSolver failed!"; + return signalPassFailure(); + } + + llvm::DenseSet candidateValues; + func.walk([&](fir::FreeMemOp child) { + const LatticePoint *lattice = solver.lookupState(child); + // there will be no lattice for an unreachable block + if (lattice) { + lattice->appendFreedValues(candidateValues); + } + }); + + llvm::DenseSet candidateOps; + candidateOps.reserve(candidateValues.size()); + for (const mlir::Value &val : candidateValues) { + ++runCount; + LLVM_DEBUG(llvm::dbgs() + << "StackArrays: Found candidate value: " << val << "\n"); + candidateOps.insert(val.getDefiningOp()); + } + + mlir::MLIRContext &context = getContext(); + mlir::RewritePatternSet patterns(&context); + mlir::ConversionTarget target(context); + + target.addLegalDialect(); + + patterns.insert(&context, candidateOps); + if (mlir::failed( + mlir::applyPartialConversion(func, target, std::move(patterns)))) { + mlir::emitError(func.getLoc(), "error in stack arrays optimization\n"); + signalPassFailure(); + } + } +}; +} // namespace + +std::unique_ptr fir::createStackArraysPass() { + return std::make_unique(); +}