This is an archive of the discontinued LLVM Phabricator instance.

Reapply: [flang] use greedy mlir driver for stack arrays pass
ClosedPublic

Authored by tblah on May 30 2023, 8:53 AM.

Download Raw Diff

Details

Reviewers

vzakhari
clementval
nicolasvasilache

Commits

rG408f4196ba4a: [flang] use greedy mlir driver for stack arrays pass

Summary

In upstream mlir, the dialect conversion infrastructure is used for
lowering from one dialect to another: the passes are of the form
XToYPass. Whereas, transformations within the same dialect tend to use
applyPatternsAndFoldGreedily.

In this case, the full complexity of applyPatternsAndFoldGreedily isn't
needed so we can get away with the simpler applyOpPatternsAndFold.

This change was suggested by @jeanPerier

The old differential revision for this patch was
https://reviews.llvm.org/D150853

Re-applying here fixing the issue which led to the patch being reverted. The
issue was from erasing uses of the allocation operation while still iterating
over those uses (leading to a use-after-free). I have added a regression
test which catches this bug for -fsanitize=address builds, but it is
hard to reliably cause a crash from the use-after-free in normal builds.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

tblah created this revision.May 30 2023, 8:53 AM

Herald added projects: Restricted Project, Restricted Project. · View Herald TranscriptMay 30 2023, 8:53 AM

Herald added subscribers: sunshaoce, bzcheeseman, mehdi_amini, rriddle. · View Herald Transcript

tblah requested review of this revision.May 30 2023, 8:53 AM

Herald added subscribers: stephenneuendorffer, jdoerfert. · View Herald TranscriptMay 30 2023, 8:53 AM

tblah retitled this revision from [flang] use greedy mlir driver for stack arrays pass to Reapply: [flang] use greedy mlir driver for stack arrays pass.May 30 2023, 8:53 AM

Harbormaster completed remote builds in B235328: Diff 526658.May 30 2023, 10:28 AM

LGTM. Thank you!

This revision is now accepted and ready to land.May 30 2023, 12:01 PM

Closed by commit rG408f4196ba4a: [flang] use greedy mlir driver for stack arrays pass (authored by tblah). · Explain WhyMay 31 2023, 7:14 AM

This revision was automatically updated to reflect the committed changes.

tblah added a commit: rG408f4196ba4a: [flang] use greedy mlir driver for stack arrays pass.

Revision Contents

Path

Size

flang/

lib/

Optimizer/

Transforms/

StackArrays.cpp

81 lines

test/

Transforms/

stack-arrays.fir

27 lines

Diff 527026

flang/lib/Optimizer/Transforms/StackArrays.cpp

Show All 20 Lines
#include "mlir/Dialect/Func/IR/FuncOps.h"		#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"		#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Builders.h"		#include "mlir/IR/Builders.h"
#include "mlir/IR/Diagnostics.h"		#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/Value.h"		#include "mlir/IR/Value.h"
#include "mlir/Interfaces/LoopLikeInterface.h"		#include "mlir/Interfaces/LoopLikeInterface.h"
#include "mlir/Pass/Pass.h"		#include "mlir/Pass/Pass.h"
#include "mlir/Support/LogicalResult.h"		#include "mlir/Support/LogicalResult.h"
#include "mlir/Transforms/DialectConversion.h"		#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"		#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/DenseMap.h"		#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"		#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PointerUnion.h"		#include "llvm/ADT/PointerUnion.h"
#include "llvm/Support/Casting.h"		#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"
#include <optional>		#include <optional>

▲ Show 20 Lines • Show All 124 Lines • ▼ Show 20 Lines
public:		public:
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper)		MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper)

// Maps fir.allocmem -> place to insert alloca		// Maps fir.allocmem -> place to insert alloca
using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>;		using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>;

StackArraysAnalysisWrapper(mlir::Operation *op) {}		StackArraysAnalysisWrapper(mlir::Operation *op) {}

bool hasErrors() const;		// returns nullptr if analysis failed
		const AllocMemMap getCandidateOps(mlir::Operation func);
const AllocMemMap &getCandidateOps(mlir::Operation *func);

private:		private:
llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps;		llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps;
bool gotError = false;

void analyseFunction(mlir::Operation *func);		mlir::LogicalResult analyseFunction(mlir::Operation *func);
};		};

/// Converts a fir.allocmem to a fir.alloca		/// Converts a fir.allocmem to a fir.alloca
class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> {		class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> {
public:		public:
using OpRewritePattern::OpRewritePattern;		explicit AllocMemConversion(

AllocMemConversion(
mlir::MLIRContext *ctx,		mlir::MLIRContext *ctx,
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps);		const StackArraysAnalysisWrapper::AllocMemMap &candidateOps)
		: OpRewritePattern(ctx), candidateOps{candidateOps} {}

mlir::LogicalResult		mlir::LogicalResult
matchAndRewrite(fir::AllocMemOp allocmem,		matchAndRewrite(fir::AllocMemOp allocmem,
mlir::PatternRewriter &rewriter) const override;		mlir::PatternRewriter &rewriter) const override;

/// Determine where to insert the alloca operation. The returned value should		/// Determine where to insert the alloca operation. The returned value should
/// be checked to see if it is inside a loop		/// be checked to see if it is inside a loop
static InsertionPoint findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc);		static InsertionPoint findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc);

private:		private:
/// allocmem operations that DFA has determined are safe to move to the stack		/// Handle to the DFA (already run)
/// mapping to where to insert replacement freemem operations		const StackArraysAnalysisWrapper::AllocMemMap &candidateOps;
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps;

/// If we failed to find an insertion point not inside a loop, see if it would		/// If we failed to find an insertion point not inside a loop, see if it would
/// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop		/// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop
static InsertionPoint findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc);		static InsertionPoint findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc);

/// Returns the alloca if it was successfully inserted, otherwise {}		/// Returns the alloca if it was successfully inserted, otherwise {}
std::optional<fir::AllocaOp>		std::optional<fir::AllocaOp>
insertAlloca(fir::AllocMemOp &oldAlloc,		insertAlloca(fir::AllocMemOp &oldAlloc,
▲ Show 20 Lines • Show All 197 Lines • ▼ Show 20 Lines	if (mlir::Operation *prev = op->getPrevNode())
before = getLatticeFor(op, prev);		before = getLatticeFor(op, prev);
else		else
before = getLatticeFor(op, op->getBlock());		before = getLatticeFor(op, op->getBlock());

/// Invoke the operation transfer function		/// Invoke the operation transfer function
visitOperationImpl(op, *before, after);		visitOperationImpl(op, *before, after);
}		}

void StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {		mlir::LogicalResult
		StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
assert(mlir::isa<mlir::func::FuncOp>(func));		assert(mlir::isa<mlir::func::FuncOp>(func));
mlir::DataFlowSolver solver;		mlir::DataFlowSolver solver;
// constant propagation is required for dead code analysis, dead code analysis		// constant propagation is required for dead code analysis, dead code analysis
// is required to mark blocks live (required for mlir dense dfa)		// is required to mark blocks live (required for mlir dense dfa)
solver.load<mlir::dataflow::SparseConstantPropagation>();		solver.load<mlir::dataflow::SparseConstantPropagation>();
solver.load<mlir::dataflow::DeadCodeAnalysis>();		solver.load<mlir::dataflow::DeadCodeAnalysis>();

auto [it, inserted] = funcMaps.try_emplace(func);		auto [it, inserted] = funcMaps.try_emplace(func);
AllocMemMap &candidateOps = it->second;		AllocMemMap &candidateOps = it->second;

solver.load<AllocationAnalysis>();		solver.load<AllocationAnalysis>();
if (failed(solver.initializeAndRun(func))) {		if (failed(solver.initializeAndRun(func))) {
llvm::errs() << "DataFlowSolver failed!";		llvm::errs() << "DataFlowSolver failed!";
gotError = true;		return mlir::failure();
return;
}		}

LatticePoint point{func};		LatticePoint point{func};
auto joinOperationLattice = [&](mlir::Operation *op) {		auto joinOperationLattice = [&](mlir::Operation *op) {
const LatticePoint *lattice = solver.lookupState<LatticePoint>(op);		const LatticePoint *lattice = solver.lookupState<LatticePoint>(op);
// there will be no lattice for an unreachable block		// there will be no lattice for an unreachable block
if (lattice)		if (lattice)
point.join(*lattice);		point.join(*lattice);
Show All 14 Lines	for (mlir::Value freedValue : freedValues) {
if (insertionPoint)		if (insertionPoint)
candidateOps.insert({allocmem, insertionPoint});		candidateOps.insert({allocmem, insertionPoint});
}		}

LLVM_DEBUG(for (auto [allocMemOp, _]		LLVM_DEBUG(for (auto [allocMemOp, _]
: candidateOps) {		: candidateOps) {
llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << '\n';		llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << '\n';
});		});
		return mlir::success();
}		}

bool StackArraysAnalysisWrapper::hasErrors() const { return gotError; }		const StackArraysAnalysisWrapper::AllocMemMap *

const StackArraysAnalysisWrapper::AllocMemMap &
StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) {		StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) {
if (!funcMaps.count(func))		if (!funcMaps.contains(func))
analyseFunction(func);		if (mlir::failed(analyseFunction(func)))
return funcMaps[func];		return nullptr;
		return &funcMaps[func];
}		}

AllocMemConversion::AllocMemConversion(
mlir::MLIRContext *ctx,
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps)
: OpRewritePattern(ctx), candidateOps(candidateOps) {}

mlir::LogicalResult		mlir::LogicalResult
AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,		AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
mlir::PatternRewriter &rewriter) const {		mlir::PatternRewriter &rewriter) const {
auto oldInsertionPt = rewriter.saveInsertionPoint();		auto oldInsertionPt = rewriter.saveInsertionPoint();
// add alloca operation		// add alloca operation
std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter);		std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter);
rewriter.restoreInsertionPoint(oldInsertionPt);		rewriter.restoreInsertionPoint(oldInsertionPt);
if (!alloca)		if (!alloca)
return mlir::failure();		return mlir::failure();

// remove freemem operations		// remove freemem operations
		llvm::SmallVector<mlir::Operation *> erases;
for (mlir::Operation *user : allocmem.getOperation()->getUsers())		for (mlir::Operation *user : allocmem.getOperation()->getUsers())
if (mlir::isa<fir::FreeMemOp>(user))		if (mlir::isa<fir::FreeMemOp>(user))
rewriter.eraseOp(user);		erases.push_back(user);
		// now we are done iterating the users, it is safe to mutate them
		for (mlir::Operation *erase : erases)
		rewriter.eraseOp(erase);

// replace references to heap allocation with references to stack allocation		// replace references to heap allocation with references to stack allocation
rewriter.replaceAllUsesWith(allocmem.getResult(), alloca->getResult());		rewriter.replaceAllUsesWith(allocmem.getResult(), alloca->getResult());

// remove allocmem operation		// remove allocmem operation
rewriter.eraseOp(allocmem.getOperation());		rewriter.eraseOp(allocmem.getOperation());

return mlir::success();		return mlir::success();
▲ Show 20 Lines • Show All 205 Lines • ▼ Show 20 Lines	void StackArraysPass::runOnOperation() {

mod.walk([this](mlir::func::FuncOp func) { runOnFunc(func); });		mod.walk([this](mlir::func::FuncOp func) { runOnFunc(func); });
}		}

void StackArraysPass::runOnFunc(mlir::Operation *func) {		void StackArraysPass::runOnFunc(mlir::Operation *func) {
assert(mlir::isa<mlir::func::FuncOp>(func));		assert(mlir::isa<mlir::func::FuncOp>(func));

auto &analysis = getAnalysis<StackArraysAnalysisWrapper>();		auto &analysis = getAnalysis<StackArraysAnalysisWrapper>();
const auto &candidateOps = analysis.getCandidateOps(func);		const StackArraysAnalysisWrapper::AllocMemMap *candidateOps =
if (analysis.hasErrors()) {		analysis.getCandidateOps(func);
		if (!candidateOps) {
signalPassFailure();		signalPassFailure();
return;		return;
}		}

if (candidateOps.empty())		if (candidateOps->empty())
return;		return;
runCount += candidateOps.size();		runCount += candidateOps->size();

		llvm::SmallVector<mlir::Operation *> opsToConvert;
		opsToConvert.reserve(candidateOps->size());
		for (auto [op, _] : *candidateOps)
		opsToConvert.push_back(op);

mlir::MLIRContext &context = getContext();		mlir::MLIRContext &context = getContext();
mlir::RewritePatternSet patterns(&context);		mlir::RewritePatternSet patterns(&context);
mlir::ConversionTarget target(context);		mlir::GreedyRewriteConfig config;
		// prevent the pattern driver form merging blocks
target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,		config.enableRegionSimplification = false;
mlir::func::FuncDialect>();
target.addDynamicallyLegalOp<fir::AllocMemOp>([&](fir::AllocMemOp alloc) {		patterns.insert<AllocMemConversion>(&context, *candidateOps);
return !candidateOps.count(alloc.getOperation());		if (mlir::failed(mlir::applyOpPatternsAndFold(opsToConvert,
});		std::move(patterns), config))) {

patterns.insert<AllocMemConversion>(&context, candidateOps);
if (mlir::failed(
mlir::applyPartialConversion(func, target, std::move(patterns)))) {
mlir::emitError(func->getLoc(), "error in stack arrays optimization\n");		mlir::emitError(func->getLoc(), "error in stack arrays optimization\n");
signalPassFailure();		signalPassFailure();
}		}
}		}

std::unique_ptr<mlir::Pass> fir::createStackArraysPass() {		std::unique_ptr<mlir::Pass> fir::createStackArraysPass() {
return std::make_unique<StackArraysPass>();		return std::make_unique<StackArraysPass>();
}		}

flang/test/Transforms/stack-arrays.fir

	Show First 20 Lines • Show All 78 Lines • ▼ Show 20 Lines
	// CHECK: func.func @dfa3(%arg0: i1) {			// CHECK: func.func @dfa3(%arg0: i1) {
	// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<1xi8>			// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<1xi8>
	// CHECK-NEXT: fir.if %arg0 {			// CHECK-NEXT: fir.if %arg0 {
	// CHECK-NEXT: } else {			// CHECK-NEXT: } else {
	// CHECK-NEXT: }			// CHECK-NEXT: }
	// CHECK-NEXT: return			// CHECK-NEXT: return
	// CHECK-NEXT: }			// CHECK-NEXT: }

				func.func private @dfa3a_foo(!fir.ref<!fir.array<1xi8>>) -> ()
				func.func private @dfa3a_bar(!fir.ref<!fir.array<1xi8>>) -> ()

				// Check freemem in both regions, with other uses
				func.func @dfa3a(%arg0: i1) {
				%a = fir.allocmem !fir.array<1xi8>
				fir.if %arg0 {
				%ref = fir.convert %a : (!fir.heap<!fir.array<1xi8>>) -> !fir.ref<!fir.array<1xi8>>
				func.call @dfa3a_foo(%ref) : (!fir.ref<!fir.array<1xi8>>) -> ()
				fir.freemem %a : !fir.heap<!fir.array<1xi8>>
				} else {
				%ref = fir.convert %a : (!fir.heap<!fir.array<1xi8>>) -> !fir.ref<!fir.array<1xi8>>
				func.call @dfa3a_bar(%ref) : (!fir.ref<!fir.array<1xi8>>) -> ()
				fir.freemem %a : !fir.heap<!fir.array<1xi8>>
				}
				return
				}
				// CHECK: func.func @dfa3a(%arg0: i1) {
				// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<1xi8>
				// CHECK-NEXT: fir.if %arg0 {
				// CHECK-NEXT: func.call @dfa3a_foo(%[[MEM]])
				// CHECK-NEXT: } else {
				// CHECK-NEXT: func.call @dfa3a_bar(%[[MEM]])
				// CHECK-NEXT: }
				// CHECK-NEXT: return
				// CHECK-NEXT: }

	// check the alloca is placed after all operands become available			// check the alloca is placed after all operands become available
	func.func @placement1() {			func.func @placement1() {
	// do some stuff with other ssa values			// do some stuff with other ssa values
	%1 = arith.constant 1 : index			%1 = arith.constant 1 : index
	%2 = arith.constant 2 : index			%2 = arith.constant 2 : index
	%3 = arith.addi %1, %2 : index			%3 = arith.addi %1, %2 : index
	// operand is now available			// operand is now available
	%4 = fir.allocmem !fir.array<?xi32>, %3			%4 = fir.allocmem !fir.array<?xi32>, %3
	▲ Show 20 Lines • Show All 250 Lines • Show Last 20 Lines