Diff 251096

mlir/include/mlir/InitAllPasses.h

Show First 20 Lines • Show All 105 Lines • ▼ Show 20 Lines	#endif
createConvertLinalgToParallelLoopsPass();		createConvertLinalgToParallelLoopsPass();
createConvertLinalgToAffineLoopsPass();		createConvertLinalgToAffineLoopsPass();
createConvertLinalgToLLVMPass();		createConvertLinalgToLLVMPass();

// LoopOps		// LoopOps
createParallelLoopFusionPass();		createParallelLoopFusionPass();
createParallelLoopSpecializationPass();		createParallelLoopSpecializationPass();
createParallelLoopTilingPass();		createParallelLoopTilingPass();
		createParallelLoopCoalescingPass();
		rriddleUnsubmitted Done Reply Inline Actions Let's keep these sorted. rriddle: Let's keep these sorted.

// QuantOps		// QuantOps
quant::createConvertSimulatedQuantPass();		quant::createConvertSimulatedQuantPass();
quant::createConvertConstPass();		quant::createConvertConstPass();
quantizer::createAddDefaultStatsPass();		quantizer::createAddDefaultStatsPass();
quantizer::createRemoveInstrumentationPass();		quantizer::createRemoveInstrumentationPass();
quantizer::registerInferQuantizedTypesPass();		quantizer::registerInferQuantizedTypesPass();

Show All 17 Lines

mlir/include/mlir/Transforms/LoopUtils.h

	Show All 22 Lines
	class AffineForOp;			class AffineForOp;
	class FuncOp;			class FuncOp;
	class OpBuilder;			class OpBuilder;
	class Value;			class Value;
	struct MemRefRegion;			struct MemRefRegion;

	namespace loop {			namespace loop {
	class ForOp;			class ForOp;
				class ParallelOp;
	} // end namespace loop			} // end namespace loop

	/// Unrolls this for operation completely if the trip count is known to be			/// Unrolls this for operation completely if the trip count is known to be
	/// constant. Returns failure otherwise.			/// constant. Returns failure otherwise.
	LogicalResult loopUnrollFull(AffineForOp forOp);			LogicalResult loopUnrollFull(AffineForOp forOp);

	/// Unrolls this for operation by the specified unroll factor. Returns failure			/// Unrolls this for operation by the specified unroll factor. Returns failure
	/// if the loop cannot be unrolled either due to restrictions or due to invalid			/// if the loop cannot be unrolled either due to restrictions or due to invalid
	▲ Show 20 Lines • Show All 181 Lines • ▼ Show 20 Lines
	TileLoops extractFixedOuterLoops(loop::ForOp rootFOrOp,			TileLoops extractFixedOuterLoops(loop::ForOp rootFOrOp,
	ArrayRef<int64_t> sizes);			ArrayRef<int64_t> sizes);

	/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes			/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
	/// `loops` contains a list of perfectly nested loops with bounds and steps			/// `loops` contains a list of perfectly nested loops with bounds and steps
	/// independent of any loop induction variable involved in the nest.			/// independent of any loop induction variable involved in the nest.
	void coalesceLoops(MutableArrayRef<loop::ForOp> loops);			void coalesceLoops(MutableArrayRef<loop::ForOp> loops);

				void coalescePLoops(loop::ParallelOp loops,
				rriddleUnsubmitted Done Reply Inline Actions Can you please add a comment here? rriddle: Can you please add a comment here?
				std::vector<std::vector<unsigned>> combinedDimensions);

	/// Maps `forOp` for execution on a parallel grid of virtual `processorIds` of			/// Maps `forOp` for execution on a parallel grid of virtual `processorIds` of
	/// size given by `numProcessors`. This is achieved by embedding the SSA values			/// size given by `numProcessors`. This is achieved by embedding the SSA values
	/// corresponding to `processorIds` and `numProcessors` into the bounds and step			/// corresponding to `processorIds` and `numProcessors` into the bounds and step
	/// of the `forOp`. No check is performed on the legality of the rewrite, it is			/// of the `forOp`. No check is performed on the legality of the rewrite, it is
	/// the caller's responsibility to ensure legality.			/// the caller's responsibility to ensure legality.
	///			///
	/// Requires that `processorIds` and `numProcessors` have the same size and that			/// Requires that `processorIds` and `numProcessors` have the same size and that
	/// for each idx, `processorIds`[idx] takes, at runtime, all values between 0			/// for each idx, `processorIds`[idx] takes, at runtime, all values between 0
	Show All 33 Lines

mlir/include/mlir/Transforms/Passes.h

	Show First 20 Lines • Show All 85 Lines • ▼ Show 20 Lines
	/// Creates a pass to perform tiling on loop nests.			/// Creates a pass to perform tiling on loop nests.
	std::unique_ptr<OpPassBase<FuncOp>>			std::unique_ptr<OpPassBase<FuncOp>>
	createLoopTilingPass(uint64_t cacheSizeBytes);			createLoopTilingPass(uint64_t cacheSizeBytes);

	/// Creates a pass that transforms perfectly nested loops with independent			/// Creates a pass that transforms perfectly nested loops with independent
	/// bounds into a single loop.			/// bounds into a single loop.
	std::unique_ptr<OpPassBase<FuncOp>> createLoopCoalescingPass();			std::unique_ptr<OpPassBase<FuncOp>> createLoopCoalescingPass();

				/// Creates a pass that transforms a single ParallelLoop over N induction
				/// variables into another ParallelLoop over less than N induction variables.
				std::unique_ptr<OpPassBase<FuncOp>> createParallelLoopCoalescingPass();

	/// Performs packing (or explicit copying) of accessed memref regions into			/// Performs packing (or explicit copying) of accessed memref regions into
	/// buffers in the specified faster memory space through either pointwise copies			/// buffers in the specified faster memory space through either pointwise copies
	/// or DMA operations.			/// or DMA operations.
	std::unique_ptr<OpPassBase<FuncOp>> createAffineDataCopyGenerationPass(			std::unique_ptr<OpPassBase<FuncOp>> createAffineDataCopyGenerationPass(
	unsigned slowMemorySpace, unsigned fastMemorySpace,			unsigned slowMemorySpace, unsigned fastMemorySpace,
	unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024,			unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024,
	uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max());			uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max());

	Show All 21 Lines

mlir/lib/Transforms/CMakeLists.txt

Show All 10 Lines	add_mlir_library(MLIRTransforms
LoopCoalescing.cpp		LoopCoalescing.cpp
LoopFusion.cpp		LoopFusion.cpp
LoopInvariantCodeMotion.cpp		LoopInvariantCodeMotion.cpp
LoopTiling.cpp		LoopTiling.cpp
LoopUnrollAndJam.cpp		LoopUnrollAndJam.cpp
LoopUnroll.cpp		LoopUnroll.cpp
MemRefDataFlowOpt.cpp		MemRefDataFlowOpt.cpp
OpStats.cpp		OpStats.cpp
		ParallelLoopCoalescing.cpp
PipelineDataTransfer.cpp		PipelineDataTransfer.cpp
SimplifyAffineStructures.cpp		SimplifyAffineStructures.cpp
StripDebugInfo.cpp		StripDebugInfo.cpp
SymbolDCE.cpp		SymbolDCE.cpp
Vectorize.cpp		Vectorize.cpp
ViewOpGraph.cpp		ViewOpGraph.cpp
ViewRegionGraph.cpp		ViewRegionGraph.cpp

Show All 17 Lines

mlir/lib/Transforms/ParallelLoopCoalescing.cpp

This file was added.

				//===- ParallelLoopCoalescing.cpp - Pass coalescing parallel loop indices -===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include "mlir/Dialect/LoopOps/LoopOps.h"
				#include "mlir/Dialect/StandardOps/IR/Ops.h"
				#include "mlir/Pass/Pass.h"
				#include "mlir/Transforms/LoopUtils.h"
				#include "mlir/Transforms/Passes.h"
				#include "mlir/Transforms/RegionUtils.h"
				#include "llvm/Support/CommandLine.h"
				#include "llvm/Support/Debug.h"

				#define PASS_NAME "parallel-loop-coalescing"
				#define DEBUG_TYPE PASS_NAME

				using namespace mlir;

				static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");

				rriddleUnsubmitted Done Reply Inline Actions Can you use pass options instead? https://mlir.llvm.org/docs/WritingAPass/#instance-specific-pass-options rriddle: Can you use pass options instead? https://mlir.llvm.org/docs/WritingAPass/#instance-specific…
				static llvm::cl::list<unsigned> clCoalescedIndices0(
				"coalesced-indices-0",
				llvm::cl::desc("Which loop indices to combine 0th loop index"),
				llvm::cl::MiscFlags::CommaSeparated, llvm::cl::cat(clOptionsCategory));

				static llvm::cl::list<unsigned> clCoalescedIndices1(
				"coalesced-indices-1",
				llvm::cl::desc(
				"Which loop indices to combine into the position 1 loop index"),
				llvm::cl::MiscFlags::CommaSeparated, llvm::cl::cat(clOptionsCategory));

				static llvm::cl::list<unsigned> clCoalescedIndices2(
				"coalesced-indices-2",
				llvm::cl::desc(
				"Which loop indices to combine into the position 2 loop index"),
				llvm::cl::MiscFlags::CommaSeparated, llvm::cl::cat(clOptionsCategory));

				namespace {
				class ParallelLoopCoalescingPass
				: public FunctionPass<ParallelLoopCoalescingPass> {
				public:
				herhutUnsubmitted Done Reply Inline Actions Can you make this an `OperationPass` instead? herhut: Can you make this an `OperationPass` instead?
				void runOnFunction() override {
				FuncOp func = getFunction();

				// The common case for GPU dialect will be simplifying the ParallelOp to 3
				// arguments, so we do that here to simplify things.
				func.walk([&](loop::ParallelOp op) {
				std::vector<std::vector<unsigned>> combinedLoops(3);
				combinedLoops[0] = clCoalescedIndices0;
				bondhugulaUnsubmitted Done Reply Inline Actions List initialize? bondhugula: List initialize?
				herhutUnsubmitted Done Reply Inline Actions Use `llvm::SmallVector` instead? herhut: Use `llvm::SmallVector` instead?
				combinedLoops[1] = clCoalescedIndices1;
				combinedLoops[2] = clCoalescedIndices2;
				coalescePLoops(op, combinedLoops);
				});
				}
				};

				} // namespace

				std::unique_ptr<OpPassBase<FuncOp>> mlir::createParallelLoopCoalescingPass() {
				return std::make_unique<ParallelLoopCoalescingPass>();
				}

				static PassRegistration<ParallelLoopCoalescingPass>
				reg(PASS_NAME, "coalesce parallel loops to use less induction variables.");

mlir/lib/Transforms/Utils/LoopUtils.cpp

Show First 20 Lines • Show All 967 Lines • ▼ Show 20 Lines
replaceAllUsesExcept(Value orig, Value replacement,		replaceAllUsesExcept(Value orig, Value replacement,
const SmallPtrSetImpl<Operation *> &exceptions) {		const SmallPtrSetImpl<Operation *> &exceptions) {
for (auto &use : llvm::make_early_inc_range(orig.getUses())) {		for (auto &use : llvm::make_early_inc_range(orig.getUses())) {
if (exceptions.count(use.getOwner()) == 0)		if (exceptions.count(use.getOwner()) == 0)
use.set(replacement);		use.set(replacement);
}		}
}		}

// Transform a loop with a strictly positive step		// Return the new lower bound, upper bound, and step in that order. Insert any
// for %i = %lb to %ub step %s		// additional bounds calculations before the given builder and any additional
// into a 0-based loop with step 1		// conversion back to the original loop induction value inside the given Block.
// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {		static std::tuple<Value, Value, Value>
		herhutUnsubmitted Done Reply Inline Actions Maybe have a little struct here instead of a tuple? Or use `std::tie` at use sites to improve readability. herhut: Maybe have a little struct here instead of a tuple? Or use `std::tie` at use sites to improve…
// %i = %ii * %s + %lb		normalizeLoop(OpBuilder &boundsBuilder, OpBuilder &insideLoopBuilder,
// Insert the induction variable remapping in the body of `inner`, which is		Location loc, Value lowerBound, Value upperBound, Value step,
// expected to be either `loop` or another loop perfectly nested under `loop`.		Value inductionVar) {
// Insert the definition of new bounds immediate before `outer`, which is		Value newLowerBound, newUpperBound, newStep;
		rriddleUnsubmitted Done Reply Inline Actions nit: Use /// for top-level comments rriddle: nit: Use /// for top-level comments
		herhutUnsubmitted Done Reply Inline Actions Move these closer to their first use. herhut: Move these closer to their first use.
// expected to be either `loop` or its parent in the loop nest.
static void normalizeLoop(loop::ForOp loop, loop::ForOp outer,
loop::ForOp inner) {
OpBuilder builder(outer);
Location loc = loop.getLoc();

// Check if the loop is already known to have a constant zero lower bound or		// Check if the loop is already known to have a constant zero lower bound or
// a constant one step.		// a constant one step.
bool isZeroBased = false;		bool isZeroBased = false;
if (auto ubCst =		if (auto ubCst =
dyn_cast_or_null<ConstantIndexOp>(loop.lowerBound().getDefiningOp()))		dyn_cast_or_null<ConstantIndexOp>(lowerBound.getDefiningOp()))
isZeroBased = ubCst.getValue() == 0;		isZeroBased = ubCst.getValue() == 0;

bool isStepOne = false;		bool isStepOne = false;
if (auto stepCst =		if (auto stepCst = dyn_cast_or_null<ConstantIndexOp>(step.getDefiningOp()))
dyn_cast_or_null<ConstantIndexOp>(loop.step().getDefiningOp()))
isStepOne = stepCst.getValue() == 1;		isStepOne = stepCst.getValue() == 1;

if (isZeroBased && isStepOne)
return;

// Compute the number of iterations the loop executes: ceildiv(ub - lb, step)		// Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
// assuming the step is strictly positive. Update the bounds and the step		// assuming the step is strictly positive. Update the bounds and the step
// of the loop to go from 0 to the number of iterations, if necessary.		// of the loop to go from 0 to the number of iterations, if necessary.
// TODO(zinenko): introduce support for negative steps or emit dynamic asserts		// TODO(zinenko): introduce support for negative steps or emit dynamic asserts
// on step positivity, whatever gets implemented first.		// on step positivity, whatever gets implemented first.
Value diff =		if (isZeroBased && isStepOne) {
builder.create<SubIOp>(loc, loop.upperBound(), loop.lowerBound());		return {lowerBound, upperBound, step};
		rriddleUnsubmitted Done Reply Inline Actions nit: Please drop all trivial braces. rriddle: nit: Please drop all trivial braces.
Value numIterations = ceilDivPositive(builder, loc, diff, loop.step());		}
loop.setUpperBound(numIterations);
		Value diff = boundsBuilder.create<SubIOp>(loc, upperBound, lowerBound);
Value lb = loop.lowerBound();		newUpperBound = ceilDivPositive(boundsBuilder, loc, diff, step);
if (!isZeroBased) {
Value cst0 = builder.create<ConstantIndexOp>(loc, 0);		if (isZeroBased) {
		herhutUnsubmitted Done Reply Inline Actions Maybe `Value newLowerBound = isZeroBased ? lowerBound : boundsBuilder.create<ConstantIndexOp>(loc, 0)`? herhut: Maybe `Value newLowerBound = isZeroBased ? lowerBound : boundsBuilder.create<ConstantIndexOp>…
loop.setLowerBound(cst0);		newLowerBound = lowerBound;
		} else {
		newLowerBound = boundsBuilder.create<ConstantIndexOp>(loc, 0);
}		}

Value step = loop.step();		if (isStepOne) {
		herhutUnsubmitted Done Reply Inline Actions Here, too? herhut: Here, too?
if (!isStepOne) {		newStep = step;
Value cst1 = builder.create<ConstantIndexOp>(loc, 1);		} else {
loop.setStep(cst1);		newStep = boundsBuilder.create<ConstantIndexOp>(loc, 1);
}		}

// Insert code computing the value of the original loop induction variable		// Insert code computing the value of the original loop induction variable
// from the "normalized" one.		// from the "normalized" one.
builder.setInsertionPointToStart(inner.getBody());
Value scaled =		Value scaled =
isStepOne ? loop.getInductionVar()		isStepOne ? inductionVar
: builder.create<MulIOp>(loc, loop.getInductionVar(), step);		: insideLoopBuilder.create<MulIOp>(loc, inductionVar, step);
Value shifted =		Value shifted =
isZeroBased ? scaled : builder.create<AddIOp>(loc, scaled, lb);		isZeroBased ? scaled
		: insideLoopBuilder.create<AddIOp>(loc, scaled, lowerBound);

SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),		SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),
shifted.getDefiningOp()};		shifted.getDefiningOp()};
replaceAllUsesExcept(loop.getInductionVar(), shifted, preserve);		replaceAllUsesExcept(inductionVar, shifted, preserve);
		return {newLowerBound, newUpperBound, newStep};
		}

		// Transform a loop with a strictly positive step
		rriddleUnsubmitted Done Reply Inline Actions nit: Please use /// for top-level comments. rriddle: nit: Please use /// for top-level comments.
		// for %i = %lb to %ub step %s
		// into a 0-based loop with step 1
		// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
		// %i = %ii * %s + %lb
		// Insert the induction variable remapping in the body of `inner`, which is
		// expected to be either `loop` or another loop perfectly nested under `loop`.
		// Insert the definition of new bounds immediate before `outer`, which is
		// expected to be either `loop` or its parent in the loop nest.
		static void normalizeLoop(loop::ForOp loop, loop::ForOp outer,
		loop::ForOp inner) {
		OpBuilder builder(outer);
		OpBuilder innerBuilder(inner.getBody(), inner.getBody()->begin());
		auto loopPieces =
		bondhugulaUnsubmitted Not Done Reply Inline Actions `OpBuilder innerBuilder(inner.getBody())` will be sufficient. bondhugula: `OpBuilder innerBuilder(inner.getBody())` will be sufficient.
		normalizeLoop(builder, innerBuilder, loop.getLoc(), loop.lowerBound(),
		loop.upperBound(), loop.step(), loop.getInductionVar());

		loop.setLowerBound(std::get<0>(loopPieces));
		loop.setStep(std::get<2>(loopPieces));
		herhutUnsubmitted Done Reply Inline Actions Mega-nit: The order lower, step, upper is strange... herhut: Mega-nit: The order lower, step, upper is strange...
		loop.setUpperBound(std::get<1>(loopPieces));
}		}

void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) {		void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) {
if (loops.size() < 2)		if (loops.size() < 2)
return;		return;

loop::ForOp innermost = loops.back();		loop::ForOp innermost = loops.back();
loop::ForOp outermost = loops.front();		loop::ForOp outermost = loops.front();
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) {
loop::ForOp second = loops[1];		loop::ForOp second = loops[1];
innermost.getBody()->back().erase();		innermost.getBody()->back().erase();
outermost.getBody()->getOperations().splice(		outermost.getBody()->getOperations().splice(
Block::iterator(second.getOperation()),		Block::iterator(second.getOperation()),
innermost.getBody()->getOperations());		innermost.getBody()->getOperations());
second.erase();		second.erase();
}		}

		void mlir::coalescePLoops(
		bondhugulaUnsubmitted Not Done Reply Inline Actions But to spell this out? PLoops -> ParallelLoops? bondhugula: But to spell this out? PLoops -> ParallelLoops?
		loop::ParallelOp loops,
		std::vector<std::vector<unsigned>> combinedDimensions) {
		OpBuilder outsideBuilder(loops);

		// Normalize ParallelOp's iteration pattern.
		SmallVector<Value, 3> normalizedLowerBounds;
		SmallVector<Value, 3> normalizedSteps;
		SmallVector<Value, 3> normalizedUpperBounds;
		for (unsigned i = 0; i < loops.getNumLoops(); i++) {
		OpBuilder insideLoopBuilder(loops.getBody(), loops.getBody()->begin());
		rriddleUnsubmitted Done Reply Inline Actions nit: Cache the end iterator of the loop, and prefer pre-increment. rriddle: nit: Cache the end iterator of the loop, and prefer pre-increment.
		auto resultBounds =
		normalizeLoop(outsideBuilder, insideLoopBuilder, loops.getLoc(),
		loops.lowerBound()[i], loops.upperBound()[i],
		loops.step()[i], loops.getBody()->getArgument(i));

		normalizedLowerBounds.push_back(std::get<0>(resultBounds));
		normalizedUpperBounds.push_back(std::get<1>(resultBounds));
		normalizedSteps.push_back(std::get<2>(resultBounds));
		}

		// Combine iteration spaces
		bondhugulaUnsubmitted Not Done Reply Inline Actions Nit: period at the end. bondhugula: Nit: period at the end.
		SmallVector<Value, 3> lowerBounds;
		SmallVector<Value, 3> steps;
		SmallVector<Value, 3> upperBounds;
		auto cst0 = outsideBuilder.create<ConstantIndexOp>(loops.getLoc(), 0);
		auto cst1 = outsideBuilder.create<ConstantIndexOp>(loops.getLoc(), 1);
		for (unsigned i = 0; i < combinedDimensions.size(); i++) {
		Value newUpperBound =
		outsideBuilder.create<ConstantIndexOp>(loops.getLoc(), 1);
		herhutUnsubmitted Done Reply Inline Actions Why not `newUpperBound = cst1` here? herhut: Why not `newUpperBound = cst1` here?
		tpoppAuthorUnsubmitted Done Reply Inline Actions No real reason. I thought it would be easier for debugging purposes if each string of calculations is fully unconnected from other calculations. tpopp: No real reason. I thought it would be easier for debugging purposes if each string of…
		for (auto idx : combinedDimensions[i]) {
		newUpperBound = outsideBuilder.create<MulIOp>(
		loops.getLoc(), newUpperBound, normalizedUpperBounds[idx]);
		}
		lowerBounds.push_back(cst0);
		steps.push_back(cst1);
		upperBounds.push_back(newUpperBound);
		}

		// Create new ParallelLoop with conversions to the original induction values.
		auto newPloop = outsideBuilder.create<loop::ParallelOp>(
		loops.getLoc(), lowerBounds, upperBounds, steps);
		OpBuilder insideBuilder(newPloop.getBody(), newPloop.getBody()->begin());
		for (unsigned i = 0; i < combinedDimensions.size(); i++) {
		rriddleUnsubmitted Done Reply Inline Actions Same here and below. rriddle: Same here and below.
		Value previous = newPloop.getBody()->getArgument(i);
		for (unsigned idx = 0, e = combinedDimensions[i].size(); idx < e; ++idx) {
		herhutUnsubmitted Done Reply Inline Actions A comment what this computes would help readability. herhut: A comment what this computes would help readability.
		unsigned ivar_idx = combinedDimensions[i][idx];
		if (idx != 0)
		previous = insideBuilder.create<SignedDivIOp>(
		loops.getLoc(), previous, loops.upperBound()[ivar_idx]);
		herhutUnsubmitted Done Reply Inline Actions Should this be the normalized upper bound? herhut: Should this be the normalized upper bound?
		tpoppAuthorUnsubmitted Done Reply Inline Actions Yes tpopp: Yes

		Value iv = (idx == e - 1) ? previous
		herhutUnsubmitted Done Reply Inline Actions It would read easier for me if updating previous was also done here except for the last case. Would that make sense? herhut: It would read easier for me if updating previous was also done here except for the last case.
		tpoppAuthorUnsubmitted Done Reply Inline Actions I think this trades one mess for a different one because then it's just a different bounds check and not all indexing is happening at ivar_idx anymore. tpopp: I think this trades one mess for a different one because then it's just a different bounds…
		tpoppAuthorUnsubmitted Done Reply Inline Actions I tried to restructure it to be more readable. tpopp: I tried to restructure it to be more readable.
		: insideBuilder.create<SignedRemIOp>(
		loops.getLoc(), previous,
		loops.upperBound()[ivar_idx]);
		herhutUnsubmitted Done Reply Inline Actions Normalized here, too? herhut: Normalized here, too?
		tpoppAuthorUnsubmitted Done Reply Inline Actions Yes tpopp: Yes
		replaceAllUsesInRegionWith(loops.getBody()->getArgument(ivar_idx), iv,
		loops.region());
		}
		}

		// Replace the old loop with the new loop.
		loops.getBody()->back().erase();
		newPloop.getBody()->getOperations().splice(
		Block::iterator(newPloop.getBody()->back()),
		loops.getBody()->getOperations());
		loops.erase();
		}

void mlir::mapLoopToProcessorIds(loop::ForOp forOp, ArrayRef<Value> processorId,		void mlir::mapLoopToProcessorIds(loop::ForOp forOp, ArrayRef<Value> processorId,
ArrayRef<Value> numProcessors) {		ArrayRef<Value> numProcessors) {
assert(processorId.size() == numProcessors.size());		assert(processorId.size() == numProcessors.size());
if (processorId.empty())		if (processorId.empty())
return;		return;

OpBuilder b(forOp);		OpBuilder b(forOp);
Location loc(forOp.getLoc());		Location loc(forOp.getLoc());
▲ Show 20 Lines • Show All 747 Lines • Show Last 20 Lines

mlir/test/Transforms/parallel-loop-coalescing.mlir

This file was added.

				// RUN: mlir-opt -parallel-loop-coalescing --coalesced-indices-0=0,3 --coalesced-indices-1=1,4 --coalesced-indices-2=2 %s \| FileCheck %s

				// CHECK-LABEL: func @parallel_many_dims() {
				func @parallel_many_dims() {
				// CHECK: [[VAL_0:%.*]] = constant 0 : index
				// CHECK: [[VAL_1:%.*]] = constant 1 : index
				// CHECK: [[VAL_2:%.*]] = constant 2 : index
				// CHECK: [[VAL_3:%.*]] = constant 3 : index
				// CHECK: [[VAL_4:%.*]] = constant 4 : index
				// CHECK: [[VAL_5:%.*]] = constant 5 : index
				// CHECK: [[VAL_6:%.*]] = constant 6 : index
				// CHECK: [[VAL_7:%.*]] = constant 7 : index
				// CHECK: [[VAL_8:%.*]] = constant 8 : index
				// CHECK: [[VAL_9:%.*]] = constant 9 : index
				// CHECK: [[VAL_10:%.*]] = constant 10 : index
				// CHECK: [[VAL_11:%.*]] = constant 11 : index
				// CHECK: [[VAL_12:%.*]] = constant 12 : index
				// CHECK: [[VAL_13:%.*]] = constant 13 : index
				// CHECK: [[VAL_14:%.*]] = constant 14 : index
				// CHECK: [[VAL_15:%.*]] = subi [[VAL_5]], [[VAL_3]] : index
				// CHECK: [[VAL_16:%.*]] = constant 1 : index
				// CHECK: [[VAL_17:%.*]] = subi [[VAL_4]], [[VAL_16]] : index
				// CHECK: [[VAL_18:%.*]] = addi [[VAL_15]], [[VAL_17]] : index
				// CHECK: [[VAL_19:%.*]] = divi_signed [[VAL_18]], [[VAL_4]] : index
				// CHECK: [[VAL_20:%.*]] = constant 0 : index
				// CHECK: [[VAL_21:%.*]] = constant 1 : index
				// CHECK: [[VAL_22:%.*]] = subi [[VAL_8]], [[VAL_6]] : index
				// CHECK: [[VAL_23:%.*]] = constant 1 : index
				// CHECK: [[VAL_24:%.*]] = subi [[VAL_7]], [[VAL_23]] : index
				// CHECK: [[VAL_25:%.*]] = addi [[VAL_22]], [[VAL_24]] : index
				// CHECK: [[VAL_26:%.*]] = divi_signed [[VAL_25]], [[VAL_7]] : index
				// CHECK: [[VAL_27:%.*]] = constant 0 : index
				// CHECK: [[VAL_28:%.*]] = constant 1 : index
				// CHECK: [[VAL_29:%.*]] = subi [[VAL_11]], [[VAL_9]] : index
				// CHECK: [[VAL_30:%.*]] = constant 1 : index
				// CHECK: [[VAL_31:%.*]] = subi [[VAL_10]], [[VAL_30]] : index
				// CHECK: [[VAL_32:%.*]] = addi [[VAL_29]], [[VAL_31]] : index
				// CHECK: [[VAL_33:%.*]] = divi_signed [[VAL_32]], [[VAL_10]] : index
				// CHECK: [[VAL_34:%.*]] = constant 0 : index
				// CHECK: [[VAL_35:%.*]] = constant 1 : index
				// CHECK: [[VAL_36:%.*]] = subi [[VAL_14]], [[VAL_12]] : index
				// CHECK: [[VAL_37:%.*]] = constant 1 : index
				// CHECK: [[VAL_38:%.*]] = subi [[VAL_13]], [[VAL_37]] : index
				// CHECK: [[VAL_39:%.*]] = addi [[VAL_36]], [[VAL_38]] : index
				// CHECK: [[VAL_40:%.*]] = divi_signed [[VAL_39]], [[VAL_13]] : index
				// CHECK: [[VAL_41:%.*]] = constant 0 : index
				// CHECK: [[VAL_42:%.*]] = constant 1 : index
				// CHECK: [[VAL_43:%.*]] = constant 0 : index
				// CHECK: [[VAL_44:%.*]] = constant 1 : index
				// CHECK: [[VAL_45:%.*]] = constant 1 : index
				// CHECK: [[VAL_46:%.*]] = muli [[VAL_45]], [[VAL_2]] : index
				// CHECK: [[VAL_47:%.*]] = muli [[VAL_46]], [[VAL_33]] : index
				// CHECK: [[VAL_48:%.*]] = constant 1 : index
				// CHECK: [[VAL_49:%.*]] = muli [[VAL_48]], [[VAL_19]] : index
				// CHECK: [[VAL_50:%.*]] = muli [[VAL_49]], [[VAL_40]] : index
				// CHECK: [[VAL_51:%.*]] = constant 1 : index
				// CHECK: [[VAL_52:%.*]] = muli [[VAL_51]], [[VAL_26]] : index
				%c0 = constant 0 : index
				%c1 = constant 1 : index
				%c2 = constant 2 : index
				%c3 = constant 3 : index
				%c4 = constant 4 : index
				%c5 = constant 5 : index
				%c6 = constant 6 : index
				%c7 = constant 7 : index
				%c8 = constant 8 : index
				%c9 = constant 9 : index
				%c10 = constant 10 : index
				%c11 = constant 11 : index
				%c12 = constant 12 : index
				%c13 = constant 13 : index
				%c14 = constant 14 : index
				// CHECK: loop.parallel ([[VAL_53:%.]], [[VAL_54:%.]], [[VAL_55:%.*]]) = ([[VAL_43]], [[VAL_43]], [[VAL_43]]) to ([[VAL_47]], [[VAL_50]], [[VAL_52]]) step ([[VAL_44]], [[VAL_44]], [[VAL_44]]) {

				loop.parallel (%i0, %i1, %i2, %i3, %i4) = (%c0, %c3, %c6, %c9, %c12) to (%c2, %c5, %c8, %c11, %c14)
				step (%c1, %c4, %c7, %c10, %c13) {
				// CHECK: [[VAL_56:%.*]] = remi_signed [[VAL_53]], [[VAL_2]] : index
				// CHECK: [[VAL_57:%.*]] = divi_signed [[VAL_53]], [[VAL_11]] : index
				// CHECK: [[VAL_58:%.*]] = remi_signed [[VAL_54]], [[VAL_5]] : index
				// CHECK: [[VAL_59:%.*]] = divi_signed [[VAL_54]], [[VAL_14]] : index
				// CHECK: [[VAL_60:%.*]] = muli [[VAL_59]], [[VAL_13]] : index
				// CHECK: [[VAL_61:%.*]] = addi [[VAL_60]], [[VAL_12]] : index
				// CHECK: [[VAL_62:%.*]] = muli [[VAL_57]], [[VAL_10]] : index
				// CHECK: [[VAL_63:%.*]] = addi [[VAL_62]], [[VAL_9]] : index
				// CHECK: [[VAL_64:%.*]] = muli [[VAL_55]], [[VAL_7]] : index
				// CHECK: [[VAL_65:%.*]] = addi [[VAL_64]], [[VAL_6]] : index
				// CHECK: [[VAL_66:%.*]] = muli [[VAL_58]], [[VAL_4]] : index
				// CHECK: [[VAL_67:%.*]] = addi [[VAL_66]], [[VAL_3]] : index
				// CHECK: [[VAL_68:%.*]] = "magic.op"([[VAL_56]], [[VAL_67]], [[VAL_65]], [[VAL_63]], [[VAL_61]]) : (index, index, index, index, index) -> index

				%result = "magic.op"(%i0, %i1, %i2, %i3, %i4): (index, index, index, index, index) -> index
				// CHECK: loop.yield
				}
				// CHECK: return
				return
				}

This is an archive of the discontinued LLVM Phabricator instance.

[MLIR] Add parallel loop coalescing.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 251096

mlir/include/mlir/InitAllPasses.h

mlir/include/mlir/Transforms/LoopUtils.h

mlir/include/mlir/Transforms/Passes.h

mlir/lib/Transforms/CMakeLists.txt

mlir/lib/Transforms/ParallelLoopCoalescing.cpp

mlir/lib/Transforms/Utils/LoopUtils.cpp

mlir/test/Transforms/parallel-loop-coalescing.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[MLIR] Add parallel loop coalescing.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 251096

mlir/include/mlir/InitAllPasses.h

mlir/include/mlir/Transforms/LoopUtils.h

mlir/include/mlir/Transforms/Passes.h

mlir/lib/Transforms/CMakeLists.txt

mlir/lib/Transforms/ParallelLoopCoalescing.cpp

mlir/lib/Transforms/Utils/LoopUtils.cpp

mlir/test/Transforms/parallel-loop-coalescing.mlir

[MLIR] Add parallel loop coalescing.
ClosedPublic