Index: clang/docs/tools/clang-formatted-files.txt
===================================================================
--- clang/docs/tools/clang-formatted-files.txt
+++ clang/docs/tools/clang-formatted-files.txt
@@ -2302,6 +2302,7 @@
 flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp
 flang/lib/Optimizer/Transforms/PassDetail.h
 flang/lib/Optimizer/Transforms/RewriteLoop.cpp
+flang/lib/Optimizer/Transforms/StackArrays.cpp
 flang/lib/Parser/basic-parsers.h
 flang/lib/Parser/char-block.cpp
 flang/lib/Parser/char-buffer.cpp
Index: flang/include/flang/Optimizer/Builder/MutableBox.h
===================================================================
--- flang/include/flang/Optimizer/Builder/MutableBox.h
+++ flang/include/flang/Optimizer/Builder/MutableBox.h
@@ -123,8 +123,8 @@
 void genInlinedAllocation(fir::FirOpBuilder &builder, mlir::Location loc,
                           const fir::MutableBoxValue &box,
                           mlir::ValueRange lbounds, mlir::ValueRange extents,
-                          mlir::ValueRange lenParams,
-                          llvm::StringRef allocName);
+                          mlir::ValueRange lenParams, llvm::StringRef allocName,
+                          bool mustBeHeap = false);
 
 void genInlinedDeallocate(fir::FirOpBuilder &builder, mlir::Location loc,
                           const fir::MutableBoxValue &box);
Index: flang/include/flang/Optimizer/Transforms/Passes.h
===================================================================
--- flang/include/flang/Optimizer/Transforms/Passes.h
+++ flang/include/flang/Optimizer/Transforms/Passes.h
@@ -54,6 +54,7 @@
 std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
 std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
+std::unique_ptr<mlir::Pass> createStackArraysPass();
 std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
 
 std::unique_ptr<mlir::Pass>
Index: flang/include/flang/Optimizer/Transforms/Passes.td
===================================================================
--- flang/include/flang/Optimizer/Transforms/Passes.td
+++ flang/include/flang/Optimizer/Transforms/Passes.td
@@ -216,6 +216,16 @@
   let constructor = "::fir::createMemoryAllocationPass()";
 }
 
+def StackArrays : Pass<"stack-arrays", "mlir::func::FuncOp"> {
+  let summary = "Move local array allocations from heap memory into stack memory";
+  let description = [{
+    Convert heap allocations for arrays, even those of unknown size, into stack
+    allocations.
+  }];
+  let dependentDialects = [ "fir::FIROpsDialect" ];
+  let constructor = "::fir::createStackArraysPass()";
+}
+
 def SimplifyRegionLite : Pass<"simplify-region-lite", "mlir::ModuleOp"> {
   let summary = "Region simplification";
   let description = [{
Index: flang/lib/Lower/Allocatable.cpp
===================================================================
--- flang/lib/Lower/Allocatable.cpp
+++ flang/lib/Lower/Allocatable.cpp
@@ -365,7 +365,8 @@
       }
     }
     fir::factory::genInlinedAllocation(builder, loc, box, lbounds, extents,
-                                       lenParams, mangleAlloc(alloc));
+                                       lenParams, mangleAlloc(alloc),
+                                       /*mustBeHeap=*/true);
   }
 
   void genSimpleAllocation(const Allocation &alloc,
Index: flang/lib/Optimizer/Builder/MutableBox.cpp
===================================================================
--- flang/lib/Optimizer/Builder/MutableBox.cpp
+++ flang/lib/Optimizer/Builder/MutableBox.cpp
@@ -710,13 +710,11 @@
   return newStorage;
 }
 
-void fir::factory::genInlinedAllocation(fir::FirOpBuilder &builder,
-                                        mlir::Location loc,
-                                        const fir::MutableBoxValue &box,
-                                        mlir::ValueRange lbounds,
-                                        mlir::ValueRange extents,
-                                        mlir::ValueRange lenParams,
-                                        llvm::StringRef allocName) {
+void fir::factory::genInlinedAllocation(
+    fir::FirOpBuilder &builder, mlir::Location loc,
+    const fir::MutableBoxValue &box, mlir::ValueRange lbounds,
+    mlir::ValueRange extents, mlir::ValueRange lenParams,
+    llvm::StringRef allocName, bool mustBeHeap) {
   auto lengths = getNewLengths(builder, loc, box, lenParams);
   llvm::SmallVector<mlir::Value> safeExtents;
   for (mlir::Value extent : extents)
@@ -733,6 +731,7 @@
     mlir::Value irBox = fir::factory::getMutableIRBox(builder, loc, box);
     fir::runtime::genDerivedTypeInitialize(builder, loc, irBox);
   }
+  heap->setAttr("fir.must_be_heap", builder.getBoolAttr(mustBeHeap));
 }
 
 void fir::factory::genInlinedDeallocate(fir::FirOpBuilder &builder,
Index: flang/lib/Optimizer/Transforms/CMakeLists.txt
===================================================================
--- flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -8,6 +8,7 @@
   ArrayValueCopy.cpp
   ExternalNameConversion.cpp
   MemoryAllocation.cpp
+  StackArrays.cpp
   MemRefDataFlowOpt.cpp
   SimplifyRegionLite.cpp
   AlgebraicSimplification.cpp
Index: flang/lib/Optimizer/Transforms/StackArrays.cpp
===================================================================
--- /dev/null
+++ flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -0,0 +1,407 @@
+//===- StackArrays.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
+#include "mlir/Analysis/DataFlow/DenseAnalysis.h"
+#include "mlir/Analysis/DataFlowFramework.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace fir {
+#define GEN_PASS_DEF_STACKARRAYS
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "stack-arrays"
+
+namespace {
+
+/// The state of an SSA value at each program point
+enum class AllocationState {
+  /// This means that the allocation state of a variable cannot be determined
+  /// at this program point, e.g. because one route through a conditional freed
+  /// the variable and the other route didn't.
+  /// This asserts a known-unknown: different from the unknown-unknown of having
+  /// no AllocationState stored for a particular SSA value
+  Unknown,
+  /// Means this SSA value was allocated on the heap in this function and has
+  /// now been freed
+  Freed,
+  /// Means this SSA value was allocated on the heap in this function and is a
+  /// candidate for moving to the stack
+  Allocated,
+};
+
+static void print(llvm::raw_ostream &os, AllocationState state) {
+  switch (state) {
+  case AllocationState::Unknown:
+    os << "Unknown";
+    break;
+  case AllocationState::Freed:
+    os << "Freed";
+    break;
+  case AllocationState::Allocated:
+    os << "Allocated";
+    break;
+  }
+}
+
+/// Join two AllocationStates for the same value coming from different CFG
+/// blocks
+AllocationState join(AllocationState lhs, AllocationState rhs) {
+  //           | Allocated | Freed     | Unknown
+  // ========= | ========= | ========= | =========
+  // Allocated | Allocated | Unknown   | Allocated
+  // Freed     | Unknown   | Freed     | Unknown
+  // Unknown   | Allocated | Unknown   | Unknown
+  if (lhs == rhs)
+    return lhs;
+  if ((lhs == AllocationState::Unknown && rhs == AllocationState::Allocated) ||
+      (lhs == AllocationState::Allocated && rhs == AllocationState::Unknown)) {
+    // On some path through this function we do an allocation. This is a
+    // candidate for a function-scope stack allocation so long as we are sure
+    // it gets freed by the time we return
+    return AllocationState::Allocated;
+  }
+  return AllocationState::Unknown;
+}
+
+/// Maps SSA values to their AllocationState at a particular program point
+class LatticePoint : public mlir::dataflow::AbstractDenseLattice {
+  llvm::DenseMap<mlir::Value, AllocationState> map;
+
+public:
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint)
+  using AbstractDenseLattice::AbstractDenseLattice;
+
+  bool operator==(const LatticePoint &rhs) const { return map == rhs.map; }
+
+  /// Join the lattice accross control-flow edges
+  virtual mlir::ChangeResult
+  join(const AbstractDenseLattice &lattice) override {
+    const auto &rhs = static_cast<const LatticePoint &>(lattice);
+    mlir::ChangeResult changed = mlir::ChangeResult::NoChange;
+
+    // add everything from rhs to map, handling cases where values are in both
+    for (const auto &[value, rhsState] : rhs.map) {
+      auto it = map.find(value);
+      if (it != map.end()) {
+        // value is present in both maps
+        AllocationState myState = it->second;
+        AllocationState newState = ::join(myState, rhsState);
+        if (newState != myState) {
+          changed = mlir::ChangeResult::Change;
+          it->getSecond() = newState;
+        }
+      } else {
+        // value not present in current map: add it
+        map.insert({value, rhsState});
+      }
+    }
+
+    return changed;
+  }
+
+  void print(llvm::raw_ostream &os) const override {
+    for (const auto &[value, state] : map) {
+      os << value << ": ";
+      ::print(os, state);
+    }
+  }
+
+  /// Clear all modifications
+  mlir::ChangeResult reset() {
+    if (map.empty())
+      return mlir::ChangeResult::NoChange;
+    map.clear();
+    return mlir::ChangeResult::Change;
+  }
+
+  /// Set the state of an SSA value
+  mlir::ChangeResult set(mlir::Value value, AllocationState state) {
+    if (map.count(value)) {
+      // already in map
+      AllocationState &oldState = map[value];
+      if (oldState != state) {
+        map[value] = state;
+        return mlir::ChangeResult::Change;
+      }
+      return mlir::ChangeResult::NoChange;
+    }
+    map.insert({value, state});
+    return mlir::ChangeResult::Change;
+  }
+
+  /// Get values which were allocated in this function and always freed before
+  /// the function returns
+  void appendFreedValues(llvm::DenseSet<mlir::Value> &out) const {
+    for (auto &[value, state] : map) {
+      if (state == AllocationState::Freed) {
+        out.insert(value);
+      }
+    }
+  }
+
+  std::optional<AllocationState> get(mlir::Value val) const {
+    auto it = map.find(val);
+    if (it == map.end())
+      return {};
+    return it->second;
+  }
+};
+
+class AllocationAnalysis
+    : public mlir::dataflow::DenseDataFlowAnalysis<LatticePoint> {
+public:
+  using DenseDataFlowAnalysis::DenseDataFlowAnalysis;
+
+  void visitOperation(mlir::Operation *op, const LatticePoint &before,
+                      LatticePoint *after) override {
+    LLVM_DEBUG(llvm::dbgs()
+               << "StackArrays: Visiting operation: " << *op << "\n");
+    LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
+
+    // propagate before -> after
+    mlir::ChangeResult changed = after->join(before);
+
+    if (auto allocmem = llvm::dyn_cast<fir::AllocMemOp>(op)) {
+      assert(op->getNumResults() == 1 && "fir.allocmem has one result");
+      auto attr = op->getAttrOfType<mlir::BoolAttr>("fir.must_be_heap");
+      if (attr && attr.getValue()) {
+        LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n");
+        // skip allocation marked not to be moved
+        return;
+      }
+      mlir::Value result = op->getResult(0);
+      changed |= after->set(result, AllocationState::Allocated);
+
+      auto retTy = allocmem.getAllocatedType();
+      if (!retTy.isa<fir::SequenceType>()) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "--Allocation is not for an array: skipping\n");
+        return;
+      }
+    } else if (llvm::isa<fir::FreeMemOp>(op)) {
+      assert(op->getNumOperands() == 1 && "fir.freemem has one operand");
+      mlir::Value operand = op->getOperand(0);
+      std::optional<AllocationState> operandState = before.get(operand);
+      if (operandState && *operandState == AllocationState::Allocated) {
+        // don't tag things not allocated in this function as freed, so that we
+        // don't think they are candidates for moving to the stack
+        changed |= after->set(operand, AllocationState::Freed);
+      }
+    }
+
+    LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n");
+    propagateIfChanged(after, changed);
+  }
+
+  /// At an entry point, the last modifications of all memory resources are
+  /// yet to be determined
+  void setToEntryState(LatticePoint *lattice) override {
+    propagateIfChanged(lattice, lattice->reset());
+  }
+};
+
+/// Converts a fir.allocmem to a fir.alloca
+class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> {
+private:
+  /// allocmem operations that DFA has determined are safe to move to the stack
+  const llvm::DenseSet<mlir::Operation *> &candidateOps;
+
+  /// Determine where to insert the alloca operation. The returned value should
+  /// be checked to see if it is inside a loop
+  static mlir::Operation *findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) {
+    // Ideally the alloca should be inserted at the end of the function entry
+    // block so that we do not allocate stack space in a loop. However,
+    // the operands to the alloca may not be available that early, so insert it
+    // after the last operand becomes available
+    LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: "
+                            << oldAlloc << "\n");
+
+    // Find when the last operand value becomes available
+    mlir::Block *operandsBlock = nullptr;
+    mlir::Operation *lastOperand = nullptr;
+    for (mlir::Value operand : oldAlloc.getOperands()) {
+      LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n");
+      mlir::Operation *op = operand.getDefiningOp();
+      if (!operandsBlock)
+        operandsBlock = op->getBlock();
+      else if (operandsBlock != op->getBlock()) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "----operand declared in a different block!\n");
+        // Operation::isBeforeInBlock requires the operations to be in the same
+        // block. The best we can do is the location of the allocmem.
+        return oldAlloc.getOperation();
+      }
+      if (!lastOperand || lastOperand->isBeforeInBlock(op))
+        lastOperand = op;
+    }
+
+    if (lastOperand) {
+      // there were value operands to the allocmem so insert after the last one
+      LLVM_DEBUG(llvm::dbgs()
+                 << "--Placing after last operand: " << *lastOperand << "\n");
+      return lastOperand;
+    }
+
+    // There were no value operands to the allocmem so we are safe to insert it
+    // at the start of the function entry block
+    mlir::func::FuncOp func = oldAlloc->getParentOfType<mlir::func::FuncOp>();
+    assert(func && "This analysis is run on func.func");
+    mlir::Block &entryBlock = func.getBlocks().front();
+    LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n");
+    return &entryBlock.getOperations().front();
+  }
+
+  /// Returns the alloca if it was successfully inserted, otherwise {}
+  static std::optional<fir::AllocaOp>
+  insertAlloca(fir::AllocMemOp &oldAlloc, mlir::PatternRewriter &rewriter) {
+    mlir::Operation *insertionPoint = findAllocaInsertionPoint(oldAlloc);
+    if (!insertionPoint)
+      return {};
+
+    // check that we are not going to insert the fir.alloca into a loop
+    if (insertionPoint->getParentOfType<mlir::LoopLikeOpInterface>())
+      return {};
+
+    mlir::Location loc = oldAlloc.getLoc();
+    mlir::Type varTy = oldAlloc.getInType();
+    auto unpackName =
+        [](std::optional<llvm::StringRef> opt) -> llvm::StringRef {
+      if (opt)
+        return *opt;
+      return {};
+    };
+    llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName());
+    llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName());
+
+    rewriter.setInsertionPointAfter(insertionPoint);
+    return rewriter.create<fir::AllocaOp>(loc, varTy, uniqName, bindcName,
+                                          oldAlloc.getTypeparams(),
+                                          oldAlloc.getShape());
+  }
+
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  AllocMemConversion(mlir::MLIRContext *ctx,
+                     const llvm::DenseSet<mlir::Operation *> &candidateOps)
+      : OpRewritePattern(ctx), candidateOps(candidateOps) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(fir::AllocMemOp allocmem,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (!candidateOps.count(allocmem.getOperation())) {
+      /// This allocmem was not identified as being safe to move to the stack
+      return mlir::success();
+    }
+
+    auto oldInsertionPt = rewriter.saveInsertionPoint();
+    // add alloca operation
+    std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter);
+    rewriter.restoreInsertionPoint(oldInsertionPt);
+    if (!alloca)
+      return mlir::failure();
+
+    // remove freemem operations
+    for (mlir::Operation *user : allocmem.getOperation()->getUsers()) {
+      if (mlir::isa<fir::FreeMemOp>(user)) {
+        rewriter.eraseOp(user);
+      }
+    }
+
+    // replace references to heap allocation with references to stack allocation
+    rewriter.replaceAllUsesWith(allocmem.getResult(), alloca->getResult());
+
+    // remove allocmem operation
+    rewriter.eraseOp(allocmem.getOperation());
+
+    return mlir::success();
+  }
+};
+
+class StackArraysPass : public fir::impl::StackArraysBase<StackArraysPass> {
+public:
+  StackArraysPass() = default;
+  StackArraysPass(const StackArraysPass &pass) {}
+
+  llvm::StringRef getDescription() const override {
+    return "Move heap allocated array temporaries to the stack";
+  }
+
+  Statistic runCount{this, "stackArraysRunCount",
+                     "Number of heap arrays moved to the stack"};
+
+  void runOnOperation() override {
+    // TODO: move this to analysis
+    mlir::Operation *op = getOperation();
+    mlir::func::FuncOp func = mlir::dyn_cast<mlir::func::FuncOp>(op);
+    assert(func && "This pass is defined as walking func.func");
+
+    mlir::DataFlowSolver solver;
+    solver.load<mlir::dataflow::DeadCodeAnalysis>(); // required to mark blocks
+                                                     // live
+    solver.load<AllocationAnalysis>();
+    if (failed(solver.initializeAndRun(op))) {
+      llvm::errs() << "DataFlowSolver failed!";
+      return signalPassFailure();
+    }
+
+    llvm::DenseSet<mlir::Value> candidateValues;
+    func.walk([&](fir::FreeMemOp child) {
+      const LatticePoint *lattice = solver.lookupState<LatticePoint>(child);
+      // there will be no lattice for an unreachable block
+      if (lattice) {
+        lattice->appendFreedValues(candidateValues);
+      }
+    });
+
+    llvm::DenseSet<mlir::Operation *> candidateOps;
+    candidateOps.reserve(candidateValues.size());
+    for (const mlir::Value &val : candidateValues) {
+      ++runCount;
+      LLVM_DEBUG(llvm::dbgs()
+                 << "StackArrays: Found candidate value: " << val << "\n");
+      candidateOps.insert(val.getDefiningOp());
+    }
+
+    mlir::MLIRContext &context = getContext();
+    mlir::RewritePatternSet patterns(&context);
+    mlir::ConversionTarget target(context);
+
+    target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,
+                           mlir::func::FuncDialect>();
+
+    patterns.insert<AllocMemConversion>(&context, candidateOps);
+    if (mlir::failed(
+            mlir::applyPartialConversion(func, target, std::move(patterns)))) {
+      mlir::emitError(func.getLoc(), "error in stack arrays optimization\n");
+      signalPassFailure();
+    }
+  }
+};
+} // namespace
+
+std::unique_ptr<mlir::Pass> fir::createStackArraysPass() {
+  return std::make_unique<StackArraysPass>();
+}