Diff 252767

mlir/include/mlir/InitAllPasses.h

Show First 20 Lines • Show All 107 Lines • ▼ Show 20 Lines	inline void registerAllPasses() {
createConvertLinalgToParallelLoopsPass();		createConvertLinalgToParallelLoopsPass();
createConvertLinalgToAffineLoopsPass();		createConvertLinalgToAffineLoopsPass();
createConvertLinalgToLLVMPass();		createConvertLinalgToLLVMPass();

// LLVM		// LLVM
LLVM::createLegalizeForExportPass();		LLVM::createLegalizeForExportPass();

// LoopOps		// LoopOps
		createParallelLoopCollapsingPass();
createParallelLoopFusionPass();		createParallelLoopFusionPass();
createParallelLoopSpecializationPass();		createParallelLoopSpecializationPass();
createParallelLoopTilingPass();		createParallelLoopTilingPass();

		rriddleUnsubmitted Done Reply Inline Actions Let's keep these sorted. rriddle: Let's keep these sorted.
// QuantOps		// QuantOps
quant::createConvertSimulatedQuantPass();		quant::createConvertSimulatedQuantPass();
quant::createConvertConstPass();		quant::createConvertConstPass();
quantizer::createAddDefaultStatsPass();		quantizer::createAddDefaultStatsPass();
quantizer::createRemoveInstrumentationPass();		quantizer::createRemoveInstrumentationPass();
quantizer::registerInferQuantizedTypesPass();		quantizer::registerInferQuantizedTypesPass();

// SPIR-V		// SPIR-V
Show All 16 Lines

mlir/include/mlir/Transforms/LoopUtils.h

	Show All 22 Lines
	class AffineForOp;			class AffineForOp;
	class FuncOp;			class FuncOp;
	class OpBuilder;			class OpBuilder;
	class Value;			class Value;
	struct MemRefRegion;			struct MemRefRegion;

	namespace loop {			namespace loop {
	class ForOp;			class ForOp;
				class ParallelOp;
	} // end namespace loop			} // end namespace loop

	/// Unrolls this for operation completely if the trip count is known to be			/// Unrolls this for operation completely if the trip count is known to be
	/// constant. Returns failure otherwise.			/// constant. Returns failure otherwise.
	LogicalResult loopUnrollFull(AffineForOp forOp);			LogicalResult loopUnrollFull(AffineForOp forOp);

	/// Unrolls this for operation by the specified unroll factor. Returns failure			/// Unrolls this for operation by the specified unroll factor. Returns failure
	/// if the loop cannot be unrolled either due to restrictions or due to invalid			/// if the loop cannot be unrolled either due to restrictions or due to invalid
	▲ Show 20 Lines • Show All 182 Lines • ▼ Show 20 Lines
	TileLoops extractFixedOuterLoops(loop::ForOp rootFOrOp,			TileLoops extractFixedOuterLoops(loop::ForOp rootFOrOp,
	ArrayRef<int64_t> sizes);			ArrayRef<int64_t> sizes);

	/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes			/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
	/// `loops` contains a list of perfectly nested loops with bounds and steps			/// `loops` contains a list of perfectly nested loops with bounds and steps
	/// independent of any loop induction variable involved in the nest.			/// independent of any loop induction variable involved in the nest.
	void coalesceLoops(MutableArrayRef<loop::ForOp> loops);			void coalesceLoops(MutableArrayRef<loop::ForOp> loops);

				/// Take the ParallelLoop and for each set of dimension indices, combine them
				rriddleUnsubmitted Done Reply Inline Actions Can you please add a comment here? rriddle: Can you please add a comment here?
				/// into a single dimension. combinedDimensions must contain each index into
				/// loops exactly once.
				void collapsePLoops(loop::ParallelOp loops,
				ArrayRef<std::vector<unsigned>> combinedDimensions);

	/// Maps `forOp` for execution on a parallel grid of virtual `processorIds` of			/// Maps `forOp` for execution on a parallel grid of virtual `processorIds` of
	/// size given by `numProcessors`. This is achieved by embedding the SSA values			/// size given by `numProcessors`. This is achieved by embedding the SSA values
	/// corresponding to `processorIds` and `numProcessors` into the bounds and step			/// corresponding to `processorIds` and `numProcessors` into the bounds and step
	/// of the `forOp`. No check is performed on the legality of the rewrite, it is			/// of the `forOp`. No check is performed on the legality of the rewrite, it is
	/// the caller's responsibility to ensure legality.			/// the caller's responsibility to ensure legality.
	///			///
	/// Requires that `processorIds` and `numProcessors` have the same size and that			/// Requires that `processorIds` and `numProcessors` have the same size and that
	/// for each idx, `processorIds`[idx] takes, at runtime, all values between 0			/// for each idx, `processorIds`[idx] takes, at runtime, all values between 0
	Show All 33 Lines

mlir/include/mlir/Transforms/Passes.h

	Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	/// to equivalent lower-level constructs (flow of basic blocks and arithmetic			/// to equivalent lower-level constructs (flow of basic blocks and arithmetic
	/// primitives).			/// primitives).
	std::unique_ptr<OpPassBase<FuncOp>> createLowerAffinePass();			std::unique_ptr<OpPassBase<FuncOp>> createLowerAffinePass();

	/// Creates a pass that transforms perfectly nested loops with independent			/// Creates a pass that transforms perfectly nested loops with independent
	/// bounds into a single loop.			/// bounds into a single loop.
	std::unique_ptr<OpPassBase<FuncOp>> createLoopCoalescingPass();			std::unique_ptr<OpPassBase<FuncOp>> createLoopCoalescingPass();

				/// Creates a pass that transforms a single ParallelLoop over N induction
				/// variables into another ParallelLoop over less than N induction variables.
				std::unique_ptr<Pass> createParallelLoopCollapsingPass();

				/// Performs packing (or explicit copying) of accessed memref regions into
				/// buffers in the specified faster memory space through either pointwise copies
				/// or DMA operations.
				std::unique_ptr<OpPassBase<FuncOp>> createAffineDataCopyGenerationPass(
				unsigned slowMemorySpace, unsigned fastMemorySpace,
				unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024,
				uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max());

	/// Creates a pass to perform optimizations relying on memref dataflow such as			/// Creates a pass to perform optimizations relying on memref dataflow such as
	/// store to load forwarding, elimination of dead stores, and dead allocs.			/// store to load forwarding, elimination of dead stores, and dead allocs.
	std::unique_ptr<OpPassBase<FuncOp>> createMemRefDataFlowOptPass();			std::unique_ptr<OpPassBase<FuncOp>> createMemRefDataFlowOptPass();

	/// Creates a pass to strip debug information from a function.			/// Creates a pass to strip debug information from a function.
	std::unique_ptr<Pass> createStripDebugInfoPass();			std::unique_ptr<Pass> createStripDebugInfoPass();

	/// Creates a pass which prints the list of ops and the number of occurences in			/// Creates a pass which prints the list of ops and the number of occurences in
	Show All 13 Lines

mlir/lib/Transforms/CMakeLists.txt

	add_subdirectory(Utils)			add_subdirectory(Utils)

	add_mlir_library(MLIRTransforms			add_mlir_library(MLIRTransforms
	Canonicalizer.cpp			Canonicalizer.cpp
	CSE.cpp			CSE.cpp
	DialectConversion.cpp			DialectConversion.cpp
	Inliner.cpp			Inliner.cpp
	LocationSnapshot.cpp			LocationSnapshot.cpp
	LoopCoalescing.cpp			LoopCoalescing.cpp
	LoopFusion.cpp			LoopFusion.cpp
	LoopInvariantCodeMotion.cpp			LoopInvariantCodeMotion.cpp
	MemRefDataFlowOpt.cpp			MemRefDataFlowOpt.cpp
	OpStats.cpp			OpStats.cpp
				ParallelLoopCollapsing.cpp
	PipelineDataTransfer.cpp			PipelineDataTransfer.cpp
	StripDebugInfo.cpp			StripDebugInfo.cpp
	SymbolDCE.cpp			SymbolDCE.cpp
	ViewOpGraph.cpp			ViewOpGraph.cpp
	ViewRegionGraph.cpp			ViewRegionGraph.cpp

	ADDITIONAL_HEADER_DIRS			ADDITIONAL_HEADER_DIRS
	${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms			${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms
	Show All 15 Lines

mlir/lib/Transforms/ParallelLoopCollapsing.cpp

This file was added.

				//===- ParallelLoopCollapsing.cpp - Pass collapsing parallel loop indices -===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include "mlir/Dialect/LoopOps/LoopOps.h"
				#include "mlir/Dialect/StandardOps/IR/Ops.h"
				#include "mlir/Pass/Pass.h"
				#include "mlir/Transforms/LoopUtils.h"
				#include "mlir/Transforms/Passes.h"
				#include "mlir/Transforms/RegionUtils.h"
				#include "llvm/Support/CommandLine.h"
				#include "llvm/Support/Debug.h"

				#define PASS_NAME "parallel-loop-collapsing"
				#define DEBUG_TYPE PASS_NAME

				using namespace mlir;

				namespace {
				struct ParallelLoopCollapsing : public OperationPass<ParallelLoopCollapsing> {
				ParallelLoopCollapsing() = default;
				ParallelLoopCollapsing(const ParallelLoopCollapsing &) {}
				void runOnOperation() override {
				Operation *module = getOperation();

				module->walk([&](loop::ParallelOp op) {
				// The common case for GPU dialect will be simplifying the ParallelOp to 3
				// arguments, so we do that here to simplify things.
				llvm::SmallVector<std::vector<unsigned>, 3> combinedLoops;
				if (clCollapsedIndices0.size())
				combinedLoops.push_back(clCollapsedIndices0);
				if (clCollapsedIndices1.size())
				combinedLoops.push_back(clCollapsedIndices1);
				if (clCollapsedIndices2.size())
				combinedLoops.push_back(clCollapsedIndices2);
				collapsePLoops(op, combinedLoops);
				});
				}

				ListOption<unsigned> clCollapsedIndices0{
				*this, "collapsed-indices-0",
				llvm::cl::desc("Which loop indices to combine 0th loop index"),
				llvm::cl::MiscFlags::CommaSeparated};

				ListOption<unsigned> clCollapsedIndices1{
				*this, "collapsed-indices-1",
				llvm::cl::desc(
				"Which loop indices to combine into the position 1 loop index"),
				llvm::cl::MiscFlags::CommaSeparated};

				ListOption<unsigned> clCollapsedIndices2{
				*this, "collapsed-indices-2",
				llvm::cl::desc(
				"Which loop indices to combine into the position 2 loop index"),
				llvm::cl::MiscFlags::CommaSeparated};
				};

				} // namespace

				std::unique_ptr<Pass> mlir::createParallelLoopCollapsingPass() {
				return std::make_unique<ParallelLoopCollapsing>();
				}

				static PassRegistration<ParallelLoopCollapsing>
				reg(PASS_NAME, "collapse parallel loops to use less induction variables.");

mlir/lib/Transforms/Utils/LoopUtils.cpp

Show All 30 Lines
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"

#define DEBUG_TYPE "LoopUtils"		#define DEBUG_TYPE "LoopUtils"

using namespace mlir;		using namespace mlir;
using llvm::SetVector;		using llvm::SetVector;
using llvm::SmallMapVector;		using llvm::SmallMapVector;

		namespace {
		// This structure is to pass and return sets of loop parameters without
		// confusing the order.
		struct LoopParams {
		Value lowerBound;
		Value upperBound;
		Value step;
		};
		} // namespace

/// Computes the cleanup loop lower bound of the loop being unrolled with		/// Computes the cleanup loop lower bound of the loop being unrolled with
/// the specified unroll factor; this bound will also be upper bound of the main		/// the specified unroll factor; this bound will also be upper bound of the main
/// part of the unrolled loop. Computes the bound as an AffineMap with its		/// part of the unrolled loop. Computes the bound as an AffineMap with its
/// operands or a null map when the trip count can't be expressed as an affine		/// operands or a null map when the trip count can't be expressed as an affine
/// expression.		/// expression.
void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,		void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
AffineMap *map,		AffineMap *map,
SmallVectorImpl<Value> *operands,		SmallVectorImpl<Value> *operands,
▲ Show 20 Lines • Show All 1,042 Lines • ▼ Show 20 Lines
replaceAllUsesExcept(Value orig, Value replacement,		replaceAllUsesExcept(Value orig, Value replacement,
const SmallPtrSetImpl<Operation *> &exceptions) {		const SmallPtrSetImpl<Operation *> &exceptions) {
for (auto &use : llvm::make_early_inc_range(orig.getUses())) {		for (auto &use : llvm::make_early_inc_range(orig.getUses())) {
if (exceptions.count(use.getOwner()) == 0)		if (exceptions.count(use.getOwner()) == 0)
use.set(replacement);		use.set(replacement);
}		}
}		}

// Transform a loop with a strictly positive step		/// Return the new lower bound, upper bound, and step in that order. Insert any
// for %i = %lb to %ub step %s		/// additional bounds calculations before the given builder and any additional
// into a 0-based loop with step 1		/// conversion back to the original loop induction value inside the given Block.
// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {		static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
		herhutUnsubmitted Done Reply Inline Actions Maybe have a little struct here instead of a tuple? Or use `std::tie` at use sites to improve readability. herhut: Maybe have a little struct here instead of a tuple? Or use `std::tie` at use sites to improve…
// %i = %ii * %s + %lb		OpBuilder &insideLoopBuilder, Location loc,
// Insert the induction variable remapping in the body of `inner`, which is		Value lowerBound, Value upperBound, Value step,
// expected to be either `loop` or another loop perfectly nested under `loop`.		Value inductionVar) {
		rriddleUnsubmitted Done Reply Inline Actions nit: Use /// for top-level comments rriddle: nit: Use /// for top-level comments
// Insert the definition of new bounds immediate before `outer`, which is
// expected to be either `loop` or its parent in the loop nest.
static void normalizeLoop(loop::ForOp loop, loop::ForOp outer,
loop::ForOp inner) {
OpBuilder builder(outer);
Location loc = loop.getLoc();

// Check if the loop is already known to have a constant zero lower bound or		// Check if the loop is already known to have a constant zero lower bound or
		herhutUnsubmitted Done Reply Inline Actions Move these closer to their first use. herhut: Move these closer to their first use.
// a constant one step.		// a constant one step.
bool isZeroBased = false;		bool isZeroBased = false;
if (auto ubCst =		if (auto ubCst =
dyn_cast_or_null<ConstantIndexOp>(loop.lowerBound().getDefiningOp()))		dyn_cast_or_null<ConstantIndexOp>(lowerBound.getDefiningOp()))
isZeroBased = ubCst.getValue() == 0;		isZeroBased = ubCst.getValue() == 0;

bool isStepOne = false;		bool isStepOne = false;
if (auto stepCst =		if (auto stepCst = dyn_cast_or_null<ConstantIndexOp>(step.getDefiningOp()))
dyn_cast_or_null<ConstantIndexOp>(loop.step().getDefiningOp()))
isStepOne = stepCst.getValue() == 1;		isStepOne = stepCst.getValue() == 1;

if (isZeroBased && isStepOne)
return;

// Compute the number of iterations the loop executes: ceildiv(ub - lb, step)		// Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
// assuming the step is strictly positive. Update the bounds and the step		// assuming the step is strictly positive. Update the bounds and the step
// of the loop to go from 0 to the number of iterations, if necessary.		// of the loop to go from 0 to the number of iterations, if necessary.
// TODO(zinenko): introduce support for negative steps or emit dynamic asserts		// TODO(zinenko): introduce support for negative steps or emit dynamic asserts
// on step positivity, whatever gets implemented first.		// on step positivity, whatever gets implemented first.
Value diff =		if (isZeroBased && isStepOne)
		rriddleUnsubmitted Done Reply Inline Actions nit: Please drop all trivial braces. rriddle: nit: Please drop all trivial braces.
builder.create<SubIOp>(loc, loop.upperBound(), loop.lowerBound());		return {/lowerBound=/lowerBound, /upperBound=/upperBound,
Value numIterations = ceilDivPositive(builder, loc, diff, loop.step());		/step=/step};
loop.setUpperBound(numIterations);

Value lb = loop.lowerBound();
if (!isZeroBased) {
Value cst0 = builder.create<ConstantIndexOp>(loc, 0);
loop.setLowerBound(cst0);
}

Value step = loop.step();		Value diff = boundsBuilder.create<SubIOp>(loc, upperBound, lowerBound);
if (!isStepOne) {		Value newUpperBound = ceilDivPositive(boundsBuilder, loc, diff, step);
Value cst1 = builder.create<ConstantIndexOp>(loc, 1);
		herhutUnsubmitted Done Reply Inline Actions Maybe `Value newLowerBound = isZeroBased ? lowerBound : boundsBuilder.create<ConstantIndexOp>(loc, 0)`? herhut: Maybe `Value newLowerBound = isZeroBased ? lowerBound : boundsBuilder.create<ConstantIndexOp>…
loop.setStep(cst1);		Value newLowerBound =
}		isZeroBased ? lowerBound : boundsBuilder.create<ConstantIndexOp>(loc, 0);
		Value newStep =
		isStepOne ? step : boundsBuilder.create<ConstantIndexOp>(loc, 1);

// Insert code computing the value of the original loop induction variable		// Insert code computing the value of the original loop induction variable
		herhutUnsubmitted Done Reply Inline Actions Here, too? herhut: Here, too?
// from the "normalized" one.		// from the "normalized" one.
builder.setInsertionPointToStart(inner.getBody());
Value scaled =		Value scaled =
isStepOne ? loop.getInductionVar()		isStepOne ? inductionVar
: builder.create<MulIOp>(loc, loop.getInductionVar(), step);		: insideLoopBuilder.create<MulIOp>(loc, inductionVar, step);
Value shifted =		Value shifted =
isZeroBased ? scaled : builder.create<AddIOp>(loc, scaled, lb);		isZeroBased ? scaled
		: insideLoopBuilder.create<AddIOp>(loc, scaled, lowerBound);

SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),		SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),
shifted.getDefiningOp()};		shifted.getDefiningOp()};
replaceAllUsesExcept(loop.getInductionVar(), shifted, preserve);		replaceAllUsesExcept(inductionVar, shifted, preserve);
		return {/lowerBound=/newLowerBound, /upperBound=/newUpperBound,
		/step=/newStep};
		}

		rriddleUnsubmitted Done Reply Inline Actions nit: Please use /// for top-level comments. rriddle: nit: Please use /// for top-level comments.
		/// Transform a loop with a strictly positive step
		/// for %i = %lb to %ub step %s
		/// into a 0-based loop with step 1
		/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
		/// %i = %ii * %s + %lb
		/// Insert the induction variable remapping in the body of `inner`, which is
		/// expected to be either `loop` or another loop perfectly nested under `loop`.
		/// Insert the definition of new bounds immediate before `outer`, which is
		/// expected to be either `loop` or its parent in the loop nest.
		static void normalizeLoop(loop::ForOp loop, loop::ForOp outer,
		loop::ForOp inner) {
		OpBuilder builder(outer);
		OpBuilder innerBuilder(inner.getBody(), inner.getBody()->begin());
		bondhugulaUnsubmitted Not Done Reply Inline Actions `OpBuilder innerBuilder(inner.getBody())` will be sufficient. bondhugula: `OpBuilder innerBuilder(inner.getBody())` will be sufficient.
		auto loopPieces =
		normalizeLoop(builder, innerBuilder, loop.getLoc(), loop.lowerBound(),
		loop.upperBound(), loop.step(), loop.getInductionVar());

		loop.setLowerBound(loopPieces.lowerBound);
		herhutUnsubmitted Done Reply Inline Actions Mega-nit: The order lower, step, upper is strange... herhut: Mega-nit: The order lower, step, upper is strange...
		loop.setUpperBound(loopPieces.upperBound);
		loop.setStep(loopPieces.step);
}		}

void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) {		void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) {
if (loops.size() < 2)		if (loops.size() < 2)
return;		return;

loop::ForOp innermost = loops.back();		loop::ForOp innermost = loops.back();
loop::ForOp outermost = loops.front();		loop::ForOp outermost = loops.front();
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) {
loop::ForOp second = loops[1];		loop::ForOp second = loops[1];
innermost.getBody()->back().erase();		innermost.getBody()->back().erase();
outermost.getBody()->getOperations().splice(		outermost.getBody()->getOperations().splice(
Block::iterator(second.getOperation()),		Block::iterator(second.getOperation()),
innermost.getBody()->getOperations());		innermost.getBody()->getOperations());
second.erase();		second.erase();
}		}

		void mlir::collapsePLoops(loop::ParallelOp loops,
		bondhugulaUnsubmitted Not Done Reply Inline Actions But to spell this out? PLoops -> ParallelLoops? bondhugula: But to spell this out? PLoops -> ParallelLoops?
		ArrayRef<std::vector<unsigned>> combinedDimensions) {
		OpBuilder outsideBuilder(loops);
		Location loc = loops.getLoc();

		// Normalize ParallelOp's iteration pattern.
		SmallVector<Value, 3> normalizedLowerBounds;
		SmallVector<Value, 3> normalizedSteps;
		SmallVector<Value, 3> normalizedUpperBounds;
		for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
		OpBuilder insideLoopBuilder(loops.getBody(), loops.getBody()->begin());
		rriddleUnsubmitted Done Reply Inline Actions nit: Cache the end iterator of the loop, and prefer pre-increment. rriddle: nit: Cache the end iterator of the loop, and prefer pre-increment.
		auto resultBounds =
		normalizeLoop(outsideBuilder, insideLoopBuilder, loc,
		loops.lowerBound()[i], loops.upperBound()[i],
		loops.step()[i], loops.getBody()->getArgument(i));

		normalizedLowerBounds.push_back(resultBounds.lowerBound);
		normalizedUpperBounds.push_back(resultBounds.upperBound);
		normalizedSteps.push_back(resultBounds.step);
		}

		// Combine iteration spaces
		bondhugulaUnsubmitted Not Done Reply Inline Actions Nit: period at the end. bondhugula: Nit: period at the end.
		SmallVector<Value, 3> lowerBounds;
		SmallVector<Value, 3> steps;
		SmallVector<Value, 3> upperBounds;
		auto cst0 = outsideBuilder.create<ConstantIndexOp>(loc, 0);
		auto cst1 = outsideBuilder.create<ConstantIndexOp>(loc, 1);
		for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
		Value newUpperBound = outsideBuilder.create<ConstantIndexOp>(loc, 1);
		for (auto idx : combinedDimensions[i]) {
		herhutUnsubmitted Done Reply Inline Actions Why not `newUpperBound = cst1` here? herhut: Why not `newUpperBound = cst1` here?
		tpoppAuthorUnsubmitted Done Reply Inline Actions No real reason. I thought it would be easier for debugging purposes if each string of calculations is fully unconnected from other calculations. tpopp: No real reason. I thought it would be easier for debugging purposes if each string of…
		newUpperBound = outsideBuilder.create<MulIOp>(loc, newUpperBound,
		normalizedUpperBounds[idx]);
		}
		lowerBounds.push_back(cst0);
		steps.push_back(cst1);
		upperBounds.push_back(newUpperBound);
		}

		// Create new ParallelLoop with conversions to the original induction values.
		// The loop below uses divisions to get the relevant range of values in the
		// new induction value that represent each range of the original induction
		// value. The remainders then determine based on that range, which iteration
		// of the original induction value this represents. This is a normalized value
		// that is un-normalized already by the previous logic.
		rriddleUnsubmitted Done Reply Inline Actions Same here and below. rriddle: Same here and below.
		auto newPloop = outsideBuilder.create<loop::ParallelOp>(loc, lowerBounds,
		upperBounds, steps);
		herhutUnsubmitted Done Reply Inline Actions A comment what this computes would help readability. herhut: A comment what this computes would help readability.
		OpBuilder insideBuilder(newPloop.getBody(), newPloop.getBody()->begin());
		for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
		Value previous = newPloop.getBody()->getArgument(i);
		unsigned numberCombinedDimensions = combinedDimensions[i].size();
		herhutUnsubmitted Done Reply Inline Actions Should this be the normalized upper bound? herhut: Should this be the normalized upper bound?
		tpoppAuthorUnsubmitted Done Reply Inline Actions Yes tpopp: Yes
		// Iterate over all except the last induction value.
		for (unsigned j = 0, e = numberCombinedDimensions - 1; j < e; ++j) {
		herhutUnsubmitted Done Reply Inline Actions It would read easier for me if updating previous was also done here except for the last case. Would that make sense? herhut: It would read easier for me if updating previous was also done here except for the last case.
		tpoppAuthorUnsubmitted Done Reply Inline Actions I think this trades one mess for a different one because then it's just a different bounds check and not all indexing is happening at ivar_idx anymore. tpopp: I think this trades one mess for a different one because then it's just a different bounds…
		tpoppAuthorUnsubmitted Done Reply Inline Actions I tried to restructure it to be more readable. tpopp: I tried to restructure it to be more readable.
		unsigned idx = combinedDimensions[i][j];

		// Determine the current induction value's current loop iteration
		herhutUnsubmitted Done Reply Inline Actions Normalized here, too? herhut: Normalized here, too?
		tpoppAuthorUnsubmitted Done Reply Inline Actions Yes tpopp: Yes
		Value iv = insideBuilder.create<SignedRemIOp>(loc, previous,
		normalizedUpperBounds[idx]);
		replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
		loops.region());

		// Remove the effect of the current induction value to prepare for the
		// next value.
		previous = insideBuilder.create<SignedDivIOp>(
		loc, previous, normalizedUpperBounds[idx + 1]);
		}

		// The final induction value is just the remaining value.
		unsigned idx = combinedDimensions[i][numberCombinedDimensions - 1];
		replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), previous,
		loops.region());
		}

		// Replace the old loop with the new loop.
		loops.getBody()->back().erase();
		newPloop.getBody()->getOperations().splice(
		Block::iterator(newPloop.getBody()->back()),
		loops.getBody()->getOperations());
		loops.erase();
		}

void mlir::mapLoopToProcessorIds(loop::ForOp forOp, ArrayRef<Value> processorId,		void mlir::mapLoopToProcessorIds(loop::ForOp forOp, ArrayRef<Value> processorId,
ArrayRef<Value> numProcessors) {		ArrayRef<Value> numProcessors) {
assert(processorId.size() == numProcessors.size());		assert(processorId.size() == numProcessors.size());
if (processorId.empty())		if (processorId.empty())
return;		return;

OpBuilder b(forOp);		OpBuilder b(forOp);
Location loc(forOp.getLoc());		Location loc(forOp.getLoc());
▲ Show 20 Lines • Show All 747 Lines • Show Last 20 Lines

mlir/test/Transforms/parallel-loop-collapsing.mlir

This file was added.

				// RUN: mlir-opt %s -pass-pipeline='func(parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize)' \| FileCheck %s

				// CHECK-LABEL: func @parallel_many_dims() {
				func @parallel_many_dims() {
				// CHECK: [[VAL_0:%.*]] = constant 6 : index
				// CHECK: [[VAL_1:%.*]] = constant 7 : index
				// CHECK: [[VAL_2:%.*]] = constant 9 : index
				// CHECK: [[VAL_3:%.*]] = constant 10 : index
				// CHECK: [[VAL_4:%.*]] = constant 12 : index
				// CHECK: [[VAL_5:%.*]] = constant 13 : index
				// CHECK: [[VAL_6:%.*]] = constant 3 : index
				// CHECK: [[VAL_7:%.*]] = constant 0 : index
				// CHECK: [[VAL_8:%.*]] = constant 1 : index
				// CHECK: [[VAL_9:%.*]] = constant 2 : index
				bondhugulaUnsubmitted Not Done Reply Inline Actions Can you drop the extra indent? Also VAL_0 -> C6, VAL_1 -> C7, ...? bondhugula: Can you drop the extra indent? Also VAL_0 -> C6, VAL_1 -> C7, ...?
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%c2 = constant 2 : index
				%c3 = constant 3 : index
				%c4 = constant 4 : index
				%c5 = constant 5 : index
				%c6 = constant 6 : index
				%c7 = constant 7 : index
				%c8 = constant 8 : index
				%c9 = constant 9 : index
				%c10 = constant 10 : index
				%c11 = constant 11 : index
				%c12 = constant 12 : index
				%c13 = constant 13 : index
				%c14 = constant 14 : index

				// CHECK: loop.parallel ([[VAL_10:%.]], [[VAL_11:%.]], [[VAL_12:%.*]]) = ([[VAL_7]], [[VAL_7]], [[VAL_7]]) to ([[VAL_9]], [[VAL_8]], [[VAL_8]]) step ([[VAL_8]], [[VAL_8]], [[VAL_8]]) {
				bondhugulaUnsubmitted Not Done Reply Inline Actions Can you use more descriptive names? VAL_10 -> IV0, VAL_11 -> IV1, ... bondhugula: Can you use more descriptive names? VAL_10 -> IV0, VAL_11 -> IV1, ...
				loop.parallel (%i0, %i1, %i2, %i3, %i4) = (%c0, %c3, %c6, %c9, %c12) to (%c2, %c5, %c8, %c11, %c14)
				step (%c1, %c4, %c7, %c10, %c13) {
				// CHECK: [[VAL_13:%.*]] = remi_signed [[VAL_10]], [[VAL_9]] : index
				bondhugulaUnsubmitted Not Done Reply Inline Actions Drop the additional indent between the CHECK and the string? bondhugula: Drop the additional indent between the CHECK and the string?
				// CHECK: [[VAL_14:%.*]] = divi_signed [[VAL_10]], [[VAL_8]] : index
				// CHECK: [[VAL_15:%.*]] = divi_signed [[VAL_11]], [[VAL_8]] : index
				// CHECK: [[VAL_16:%.*]] = muli [[VAL_15]], [[VAL_5]] : index
				// CHECK: [[VAL_17:%.*]] = addi [[VAL_16]], [[VAL_4]] : index
				// CHECK: [[VAL_18:%.*]] = muli [[VAL_14]], [[VAL_3]] : index
				// CHECK: [[VAL_19:%.*]] = addi [[VAL_18]], [[VAL_2]] : index
				// CHECK: [[VAL_20:%.*]] = muli [[VAL_12]], [[VAL_1]] : index
				// CHECK: [[VAL_21:%.*]] = addi [[VAL_20]], [[VAL_0]] : index
				// CHECK: [[VAL_22:%.*]] = "magic.op"([[VAL_13]], [[VAL_6]], [[VAL_21]], [[VAL_19]], [[VAL_17]]) : (index, index, index, index, index) -> index
				%result = "magic.op"(%i0, %i1, %i2, %i3, %i4): (index, index, index, index, index) -> index
				}
				return
				}
				// CHECK: loop.yield
				// CHECK: }
				bondhugulaUnsubmitted Not Done Reply Inline Actions CHECK-NEXT bondhugula: CHECK-NEXT
				// CHECK: return
				// CHECK: }
				bondhugulaUnsubmitted Not Done Reply Inline Actions CHECK-NEXT bondhugula: CHECK-NEXT

mlir/test/Transforms/single-parallel-loop-collapsing.mlir

This file was added.

				// RUN: mlir-opt %s -pass-pipeline='func(parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize)' \| FileCheck %s

				// CHECK-LABEL: func @collapse_to_single() {
				func @collapse_to_single() {
				// CHECK: [[VAL_0:%.*]] = constant 7 : index
				// CHECK: [[VAL_1:%.*]] = constant 4 : index
				// CHECK: [[VAL_2:%.*]] = constant 18 : index
				// CHECK: [[VAL_3:%.*]] = constant 3 : index
				// CHECK: [[VAL_4:%.*]] = constant 6 : index
				// CHECK: [[VAL_5:%.*]] = constant 0 : index
				// CHECK: [[VAL_6:%.*]] = constant 1 : index
				%c0 = constant 3 : index
				%c1 = constant 7 : index
				%c2 = constant 11 : index
				%c3 = constant 29 : index
				%c4 = constant 3 : index
				%c5 = constant 4 : index
				// CHECK: loop.parallel ([[VAL_7:%.*]]) = ([[VAL_5]]) to ([[VAL_2]]) step ([[VAL_6]]) {
				loop.parallel (%i0, %i1) = (%c0, %c1) to (%c2, %c3) step (%c4, %c5) {
				// CHECK: [[VAL_8:%.*]] = remi_signed [[VAL_7]], [[VAL_3]] : index
				// CHECK: [[VAL_9:%.*]] = divi_signed [[VAL_7]], [[VAL_4]] : index
				// CHECK: [[VAL_10:%.*]] = muli [[VAL_9]], [[VAL_1]] : index
				// CHECK: [[VAL_11:%.*]] = addi [[VAL_10]], [[VAL_0]] : index
				// CHECK: [[VAL_12:%.*]] = muli [[VAL_8]], [[VAL_3]] : index
				// CHECK: [[VAL_13:%.*]] = addi [[VAL_12]], [[VAL_3]] : index
				// CHECK: [[VAL_14:%.*]] = "magic.op"([[VAL_13]], [[VAL_11]]) : (index, index) -> index
				bondhugulaUnsubmitted Not Done Reply Inline Actions VAL_14 isn't used; no need to capture it. bondhugula: VAL_14 isn't used; no need to capture it.
				%result = "magic.op"(%i0, %i1): (index, index) -> index
				}
				return
				}
				// CHECK: loop.yield
				// CHECK: }
				// CHECK: return
				bondhugulaUnsubmitted Not Done Reply Inline Actions CHECK-NEXT bondhugula: CHECK-NEXT
				// CHECK: }
				bondhugulaUnsubmitted Not Done Reply Inline Actions Not needed. bondhugula: Not needed.

				bondhugulaUnsubmitted Not Done Reply Inline Actions Drop trailing blank lines. bondhugula: Drop trailing blank lines.

This is an archive of the discontinued LLVM Phabricator instance.

[MLIR] Add parallel loop coalescing.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 252767

mlir/include/mlir/InitAllPasses.h

mlir/include/mlir/Transforms/LoopUtils.h

mlir/include/mlir/Transforms/Passes.h

mlir/lib/Transforms/CMakeLists.txt

mlir/lib/Transforms/ParallelLoopCollapsing.cpp

mlir/lib/Transforms/Utils/LoopUtils.cpp

mlir/test/Transforms/parallel-loop-collapsing.mlir

mlir/test/Transforms/single-parallel-loop-collapsing.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[MLIR] Add parallel loop coalescing.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 252767

mlir/include/mlir/InitAllPasses.h

mlir/include/mlir/Transforms/LoopUtils.h

mlir/include/mlir/Transforms/Passes.h

mlir/lib/Transforms/CMakeLists.txt

mlir/lib/Transforms/ParallelLoopCollapsing.cpp

mlir/lib/Transforms/Utils/LoopUtils.cpp

mlir/test/Transforms/parallel-loop-collapsing.mlir

mlir/test/Transforms/single-parallel-loop-collapsing.mlir

[MLIR] Add parallel loop coalescing.
ClosedPublic