Diff 363610

mlir/include/mlir/Dialect/SCF/Passes.h

	Show All 18 Lines

	/// Creates a pass that bufferizes the SCF dialect.			/// Creates a pass that bufferizes the SCF dialect.
	std::unique_ptr<Pass> createSCFBufferizePass();			std::unique_ptr<Pass> createSCFBufferizePass();

	/// Creates a pass that specializes for loop for unrolling and			/// Creates a pass that specializes for loop for unrolling and
	/// vectorization.			/// vectorization.
	std::unique_ptr<Pass> createForLoopSpecializationPass();			std::unique_ptr<Pass> createForLoopSpecializationPass();

				/// Creates a pass that peels for loops at their upper bounds for
				/// better vectorization.
				std::unique_ptr<Pass> createForLoopPeelingPass();

	/// Creates a loop fusion pass which fuses parallel loops.			/// Creates a loop fusion pass which fuses parallel loops.
	std::unique_ptr<Pass> createParallelLoopFusionPass();			std::unique_ptr<Pass> createParallelLoopFusionPass();

	/// Creates a pass that specializes parallel loop for unrolling and			/// Creates a pass that specializes parallel loop for unrolling and
	/// vectorization.			/// vectorization.
	std::unique_ptr<Pass> createParallelLoopSpecializationPass();			std::unique_ptr<Pass> createParallelLoopSpecializationPass();

	/// Creates a pass which tiles innermost parallel loops.			/// Creates a pass which tiles innermost parallel loops.
	Show All 18 Lines

mlir/include/mlir/Dialect/SCF/Passes.td

	Show All 11 Lines
	include "mlir/Pass/PassBase.td"			include "mlir/Pass/PassBase.td"

	def SCFBufferize : FunctionPass<"scf-bufferize"> {			def SCFBufferize : FunctionPass<"scf-bufferize"> {
	let summary = "Bufferize the scf dialect.";			let summary = "Bufferize the scf dialect.";
	let constructor = "mlir::createSCFBufferizePass()";			let constructor = "mlir::createSCFBufferizePass()";
	let dependentDialects = ["memref::MemRefDialect"];			let dependentDialects = ["memref::MemRefDialect"];
	}			}

				def SCFForLoopPeeling
				: FunctionPass<"for-loop-peeling"> {
				let summary = "Peel `for` loops at their upper bounds.";
				mehdi_aminiUnsubmitted Not Done Reply Inline Actions The surrounding passes aren't well documented either, but can you fill the "description" field here? mehdi_amini: The surrounding passes aren't well documented either, but can you fill the "description" field…
				let constructor = "mlir::createForLoopPeelingPass()";
				let dependentDialects = ["AffineDialect"];
				}

	def SCFForLoopSpecialization			def SCFForLoopSpecialization
	: FunctionPass<"for-loop-specialization"> {			: FunctionPass<"for-loop-specialization"> {
	let summary = "Specialize `for` loops for vectorization";			let summary = "Specialize `for` loops for vectorization";
	let constructor = "mlir::createForLoopSpecializationPass()";			let constructor = "mlir::createForLoopSpecializationPass()";
	}			}

	def SCFParallelLoopFusion : Pass<"parallel-loop-fusion"> {			def SCFParallelLoopFusion : Pass<"parallel-loop-fusion"> {
	let summary = "Fuse adjacent parallel loops";			let summary = "Fuse adjacent parallel loops";
	Show All 27 Lines

mlir/include/mlir/Dialect/SCF/Transforms.h

	Show All 12 Lines
	#ifndef MLIR_DIALECT_SCF_TRANSFORMS_H_			#ifndef MLIR_DIALECT_SCF_TRANSFORMS_H_
	#define MLIR_DIALECT_SCF_TRANSFORMS_H_			#define MLIR_DIALECT_SCF_TRANSFORMS_H_

	#include "llvm/ADT/ArrayRef.h"			#include "llvm/ADT/ArrayRef.h"

	namespace mlir {			namespace mlir {

	class ConversionTarget;			class ConversionTarget;
				struct LogicalResult;
	class MLIRContext;			class MLIRContext;
	class Region;			class Region;
				class RewriterBase;
	class TypeConverter;			class TypeConverter;
	class RewritePatternSet;			class RewritePatternSet;
	using OwningRewritePatternList = RewritePatternSet;			using OwningRewritePatternList = RewritePatternSet;
	class Operation;			class Operation;

	namespace scf {			namespace scf {

				class IfOp;
				class ForOp;
	class ParallelOp;			class ParallelOp;
	class ForOp;			class ForOp;

	/// Fuses all adjacent scf.parallel operations with identical bounds and step			/// Fuses all adjacent scf.parallel operations with identical bounds and step
	/// into one scf.parallel operations. Uses a naive aliasing and dependency			/// into one scf.parallel operations. Uses a naive aliasing and dependency
	/// analysis.			/// analysis.
	void naivelyFuseParallelOps(Region &region);			void naivelyFuseParallelOps(Region &region);

				/// Rewrite a for loop with bounds/step that potentially do not divide evenly
				/// into a for loop where the step divides the iteration space evenly, followed
				/// by an scf.if for the last (partial) iteration (if any). This transformation
				/// is called "loop peeling".
				///
				/// Other patterns can simplify/canonicalize operations in the body of the loop
				/// and the scf.if. This is beneficial for a wide range of transformations such
				/// as vectorization or loop tiling.
				///
				/// E.g., assuming a lower bound of 0 (for illustration purposes):
				/// ```
				/// scf.for %iv = %c0 to %ub step %c4 {
				/// (loop body)
				/// }
				/// ```
				/// is rewritten into the following pseudo IR:
				/// ```
				/// %newUb = %ub - (%ub mod %c4)
				/// scf.for %iv = %c0 to %newUb step %c4 {
				/// (loop body)
				/// }
				/// scf.if %newUb < %ub {
				mehdi_aminiUnsubmitted Not Done Reply Inline Actions Wouldn't this need potentially more than one iteration here? (up to step-1 I think?). Also isn't this transformation just what the literature refers to as "loop peeling"? If so then please name it according, including in a more descriptive commit title (like "Add a loop peeling pass to enable vectorization" or something like that). mehdi_amini: Wouldn't this need potentially more than one iteration here? (up to step-1 I think?). Also…
				springermAuthorUnsubmitted Done Reply Inline Actions I think one iteration should be enough. The pattern is designed in such a way that it rounds down to next multiple of "step size". (Assuming lb = 0. In the more general case: newUb = ub - (ub - lb) % step.) Renamed the pass name etc. to "loop peeling". springerm: I think one iteration should be enough. The pattern is designed in such a way that it rounds…
				mehdi_aminiUnsubmitted Not Done Reply Inline Actions Oh right because the body is already executing an entire (potentially partial) step.. mehdi_amini: Oh right because the body is already executing an entire (potentially partial) step..
				/// (loop body)
				/// }
				/// ```
				///
				/// This function rewrites the given scf.for loop in-place and creates a new
				/// scf.if operation (returned via `ifOp`) for the last iteration.
				///
				/// TODO: Simplify affine.min ops inside the new loop/if statement.
				LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp);

	/// Tile a parallel loop of the form			/// Tile a parallel loop of the form
	/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)			/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
	/// step (%arg4, %arg5)			/// step (%arg4, %arg5)
	///			///
	/// into			/// into
				bondhugulaUnsubmitted Done Reply Inline Actions It should be possible to do this transformation without having to erase `forOp`. Could you do this in-place and so you won't need the output argument `mainLoop`. Operands of `for` ops can be updated. For eg. affine.for transformation utilities update the op in place and avoid erase/allocation wherever possible. bondhugula: It should be possible to do this transformation without having to erase `forOp`. Could you do…
				springermAuthorUnsubmitted Done Reply Inline Actions Good idea. springerm: Good idea.
	/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)			/// scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
	/// step (%arg4*tileSize[0],			/// step (%arg4*tileSize[0],
	/// %arg5*tileSize[1])			/// %arg5*tileSize[1])
	/// scf.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)			/// scf.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
	/// min(tileSize[1], %arg3-%j1))			/// min(tileSize[1], %arg3-%j1))
	/// step (%arg4, %arg5)			/// step (%arg4, %arg5)
	/// The old loop is replaced with the new one.			/// The old loop is replaced with the new one.
	///			///
	▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp

Show All 9 Lines
// vectorization.		// vectorization.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "PassDetail.h"		#include "PassDetail.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"		#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/SCF/Passes.h"		#include "mlir/Dialect/SCF/Passes.h"
#include "mlir/Dialect/SCF/SCF.h"		#include "mlir/Dialect/SCF/SCF.h"
		#include "mlir/Dialect/SCF/Transforms.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"		#include "mlir/Dialect/StandardOps/IR/Ops.h"
		#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/AffineExpr.h"		#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/BlockAndValueMapping.h"		#include "mlir/IR/BlockAndValueMapping.h"
		#include "mlir/IR/PatternMatch.h"
		#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
		#include "llvm/ADT/DenseMap.h"

using namespace mlir;		using namespace mlir;
using scf::ForOp;		using scf::ForOp;
using scf::ParallelOp;		using scf::ParallelOp;

/// Rewrite a parallel loop with bounds defined by an affine.min with a constant		/// Rewrite a parallel loop with bounds defined by an affine.min with a constant
/// into 2 loops after checking if the bounds are equal to that constant. This		/// into 2 loops after checking if the bounds are equal to that constant. This
/// is beneficial if the loop will almost always have the constant bound and		/// is beneficial if the loop will almost always have the constant bound and
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	Value cond =
b.create<CmpIOp>(op.getLoc(), CmpIPredicate::eq, bound, constant);		b.create<CmpIOp>(op.getLoc(), CmpIPredicate::eq, bound, constant);
map.map(bound, constant);		map.map(bound, constant);
auto ifOp = b.create<scf::IfOp>(op.getLoc(), cond, /withElseRegion=/true);		auto ifOp = b.create<scf::IfOp>(op.getLoc(), cond, /withElseRegion=/true);
ifOp.getThenBodyBuilder().clone(*op.getOperation(), map);		ifOp.getThenBodyBuilder().clone(*op.getOperation(), map);
ifOp.getElseBodyBuilder().clone(*op.getOperation());		ifOp.getElseBodyBuilder().clone(*op.getOperation());
op.erase();		op.erase();
}		}

		/// Rewrite a for loop with bounds/step that potentially do not divide evenly
		/// into a for loop where the step divides the iteration space evenly, followed
		/// by an scf.if for the last (partial) iteration (if any).
		LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp,
		scf::IfOp &ifOp) {
		RewriterBase::InsertionGuard guard(b);
		auto lbInt = getConstantIntValue(forOp.lowerBound());
		auto ubInt = getConstantIntValue(forOp.upperBound());
		auto stepInt = getConstantIntValue(forOp.step());

		// No specialization necessary if step already divides upper bound evenly.
		if (lbInt && ubInt && stepInt && (ubInt - lbInt) % *stepInt == 0)
		return failure();
		// No specialization necessary if step size is 1.
		if (stepInt == static_cast<int64_t>(1))
		return failure();

		auto loc = forOp.getLoc();
		AffineExpr dim0, dim1, dim2;
		bindDims(b.getContext(), dim0, dim1, dim2);
		// New upper bound: %ub - (%ub - %lb) mod %step
		auto modMap = AffineMap::get(3, 0, {dim1 - ((dim1 - dim0) % dim2)});
		Value splitBound = b.createOrFold<AffineApplyOp>(
		loc, modMap,
		ValueRange{forOp.lowerBound(), forOp.upperBound(), forOp.step()});

		// Set new upper loop bound.
		Value previousUb = forOp.upperBound();
		b.updateRootInPlace(forOp,
		[&]() { forOp.upperBoundMutable().assign(splitBound); });
		b.setInsertionPointAfter(forOp);

		// Do we need one more iteration?
		Value hasMoreIter =
		b.create<CmpIOp>(loc, CmpIPredicate::slt, splitBound, previousUb);

		// Create IfOp for last iteration.
		auto resultTypes = llvm::to_vector<4>(
		llvm::map_range(forOp.initArgs(), [](Value v) { return v.getType(); }));
		ifOp = b.create<scf::IfOp>(loc, resultTypes, hasMoreIter,
		/withElseRegion=/!resultTypes.empty());
		forOp.replaceAllUsesWith(ifOp->getResults());

		// Build then case.
		BlockAndValueMapping bvm;
		bvm.map(forOp.region().getArgument(0), splitBound);
		for (auto it : llvm::zip(forOp.region().getArguments().drop_front(),
		forOp->getResults())) {
		bvm.map(std::get<0>(it), std::get<1>(it));
		}
		b.cloneRegionBefore(forOp.region(), ifOp.thenRegion(),
		ifOp.thenRegion().begin(), bvm);
		// Build else case.
		if (!resultTypes.empty())
		ifOp.getElseBodyBuilder().create<scf::YieldOp>(loc, forOp->getResults());

		return success();
		}

		static constexpr char kPeeledLoopLabel[] = "__peeled_loop__";

		namespace {
		struct ForLoopPeelingPattern : public OpRewritePattern<ForOp> {
		using OpRewritePattern<ForOp>::OpRewritePattern;

		LogicalResult matchAndRewrite(ForOp forOp,
		PatternRewriter &rewriter) const override {
		if (forOp->hasAttr(kPeeledLoopLabel))
		return failure();

		scf::IfOp ifOp;
		if (failed(peelForLoop(rewriter, forOp, ifOp)))
		return failure();
		// Apply label, so that the same loop is not rewritten a second time.
		rewriter.updateRootInPlace(forOp, [&]() {
		forOp->setAttr(kPeeledLoopLabel, rewriter.getUnitAttr());
		});

		return success();
		}
		};
		} // namespace

namespace {		namespace {
struct ParallelLoopSpecialization		struct ParallelLoopSpecialization
: public SCFParallelLoopSpecializationBase<ParallelLoopSpecialization> {		: public SCFParallelLoopSpecializationBase<ParallelLoopSpecialization> {
void runOnFunction() override {		void runOnFunction() override {
getFunction().walk(		getFunction().walk(
[](ParallelOp op) { specializeParallelLoopForUnrolling(op); });		[](ParallelOp op) { specializeParallelLoopForUnrolling(op); });
}		}
};		};

struct ForLoopSpecialization		struct ForLoopSpecialization
: public SCFForLoopSpecializationBase<ForLoopSpecialization> {		: public SCFForLoopSpecializationBase<ForLoopSpecialization> {
void runOnFunction() override {		void runOnFunction() override {
getFunction().walk([](ForOp op) { specializeForLoopForUnrolling(op); });		getFunction().walk([](ForOp op) { specializeForLoopForUnrolling(op); });
}		}
};		};

		struct ForLoopPeeling : public SCFForLoopPeelingBase<ForLoopPeeling> {
		void runOnFunction() override {
		FuncOp funcOp = getFunction();
		MLIRContext *ctx = funcOp.getContext();
		RewritePatternSet patterns(ctx);
		patterns.add<ForLoopPeelingPattern>(ctx);
		(void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));

		// Drop the marker.
		funcOp.walk([](ForOp op) { op->removeAttr(kPeeledLoopLabel); });
		}
		};
} // namespace		} // namespace

std::unique_ptr<Pass> mlir::createParallelLoopSpecializationPass() {		std::unique_ptr<Pass> mlir::createParallelLoopSpecializationPass() {
return std::make_unique<ParallelLoopSpecialization>();		return std::make_unique<ParallelLoopSpecialization>();
}		}

std::unique_ptr<Pass> mlir::createForLoopSpecializationPass() {		std::unique_ptr<Pass> mlir::createForLoopSpecializationPass() {
return std::make_unique<ForLoopSpecialization>();		return std::make_unique<ForLoopSpecialization>();
}		}

		std::unique_ptr<Pass> mlir::createForLoopPeelingPass() {
		return std::make_unique<ForLoopPeeling>();
		}

mlir/test/Dialect/SCF/for-loop-peeling.mlir

This file was added.

				// RUN: mlir-opt %s -for-loop-peeling -canonicalize -split-input-file \| FileCheck %s

				// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)>
				// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
				// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0, s1, s2] -> (s0, s2 - (s2 - (s2 - s1) mod s0))>
				// CHECK: func @fully_dynamic_bounds(
				// CHECK-SAME: %[[LB:.]]: index, %[[UB:.]]: index, %[[STEP:.*]]: index
				// CHECK: %[[C0_I32:.*]] = constant 0 : i32
				// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]]
				// CHECK: %[[LOOP:.]] = scf.for %[[IV:.]] = %[[LB]] to %[[NEW_UB]]
				// CHECK-SAME: step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
				// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[STEP]], %[[UB]]]
				// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
				// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
				// CHECK: scf.yield %[[ADD]]
				// CHECK: }
				// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
				// CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
				// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[STEP]], %[[LB]], %[[UB]]]
				// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
				// CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
				// CHECK: scf.yield %[[ADD2]]
				// CHECK: } else {
				// CHECK: scf.yield %[[LOOP]]
				// CHECK: }
				// CHECK: return %[[RESULT]]
				#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
				func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 {
				%c0 = constant 0 : i32
				%r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0) -> i32 {
				%s = affine.min #map(%ub, %iv)[%step]
				%casted = index_cast %s : index to i32
				%0 = addi %arg, %casted : i32
				scf.yield %0 : i32
				}
				return %r : i32
				}

				// -----

				// CHECK-DAG: #[[MAP:.*]] = affine_map<(d0) -> (4, -d0 + 17)>
				// CHECK: func @fully_static_bounds(
				// CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32
				// CHECK-DAG: %[[C1_I32:.*]] = constant 1 : i32
				// CHECK-DAG: %[[C0:.*]] = constant 0 : index
				// CHECK-DAG: %[[C4:.*]] = constant 4 : index
				// CHECK-DAG: %[[C16:.*]] = constant 16 : index
				// CHECK: %[[LOOP:.]] = scf.for %[[IV:.]] = %[[C0]] to %[[C16]]
				// CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
				// CHECK: %[[MINOP:.*]] = affine.min #[[MAP]](%[[IV]])
				// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
				// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
				// CHECK: scf.yield %[[ADD]]
				// CHECK: }
				// CHECK: %[[RESULT:.*]] = addi %[[LOOP]], %[[C1_I32]] : i32
				// CHECK: return %[[RESULT]]
				#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
				func @fully_static_bounds() -> i32 {
				%c0_i32 = constant 0 : i32
				%lb = constant 0 : index
				%step = constant 4 : index
				%ub = constant 17 : index
				%r = scf.for %iv = %lb to %ub step %step
				iter_args(%arg = %c0_i32) -> i32 {
				%s = affine.min #map(%ub, %iv)[%step]
				%casted = index_cast %s : index to i32
				%0 = addi %arg, %casted : i32
				scf.yield %0 : i32
				}
				return %r : i32
				}

				// -----

				// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> ((s0 floordiv 4) 4)>
				// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
				// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)>
				// CHECK: func @dynamic_upper_bound(
				// CHECK-SAME: %[[UB:.*]]: index
				// CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32
				// CHECK-DAG: %[[C0:.*]] = constant 0 : index
				// CHECK-DAG: %[[C4:.*]] = constant 4 : index
				// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
				// CHECK: %[[LOOP:.]] = scf.for %[[IV:.]] = %[[C0]] to %[[NEW_UB]]
				// CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
				// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]]
				// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
				// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
				// CHECK: scf.yield %[[ADD]]
				// CHECK: }
				// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
				// CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
				// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]]
				// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
				// CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
				// CHECK: scf.yield %[[ADD2]]
				// CHECK: } else {
				// CHECK: scf.yield %[[LOOP]]
				// CHECK: }
				// CHECK: return %[[RESULT]]
				#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
				func @dynamic_upper_bound(%ub : index) -> i32 {
				%c0_i32 = constant 0 : i32
				%lb = constant 0 : index
				%step = constant 4 : index
				%r = scf.for %iv = %lb to %ub step %step
				iter_args(%arg = %c0_i32) -> i32 {
				%s = affine.min #map(%ub, %iv)[%step]
				%casted = index_cast %s : index to i32
				%0 = addi %arg, %casted : i32
				scf.yield %0 : i32
				}
				return %r : i32
				}

				// -----

				// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> ((s0 floordiv 4) 4)>
				// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
				// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)>
				// CHECK: func @no_loop_results(
				// CHECK-SAME: %[[UB:.]]: index, %[[MEMREF:.]]: memref<i32>
				// CHECK-DAG: %[[C0:.*]] = constant 0 : index
				// CHECK-DAG: %[[C4:.*]] = constant 4 : index
				// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
				// CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] {
				// CHECK: %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]]
				// CHECK: %[[LOAD:.*]] = memref.load %[[MEMREF]][]
				// CHECK: %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
				// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[CAST]] : i32
				// CHECK: memref.store %[[ADD]], %[[MEMREF]]
				// CHECK: }
				// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
				// CHECK: scf.if %[[HAS_MORE]] {
				// CHECK: %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]]
				// CHECK: %[[LOAD2:.*]] = memref.load %[[MEMREF]][]
				// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
				// CHECK: %[[ADD2:.*]] = addi %[[LOAD2]], %[[CAST2]]
				// CHECK: memref.store %[[ADD2]], %[[MEMREF]]
				// CHECK: }
				// CHECK: return
				#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
				func @no_loop_results(%ub : index, %d : memref<i32>) {
				%c0_i32 = constant 0 : i32
				%lb = constant 0 : index
				%step = constant 4 : index
				scf.for %iv = %lb to %ub step %step {
				%s = affine.min #map(%ub, %iv)[%step]
				%r = memref.load %d[] : memref<i32>
				%casted = index_cast %s : index to i32
				%0 = addi %r, %casted : i32
				memref.store %0, %d[] : memref<i32>
				}
				return
				}

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

Show First 20 Lines • Show All 1,473 Lines • ▼ Show 20 Lines	cc_library(
srcs = glob([		srcs = glob([
"lib/Dialect/SCF/Transforms/*.cpp",		"lib/Dialect/SCF/Transforms/*.cpp",
"lib/Dialect/SCF/Transforms/*.h",		"lib/Dialect/SCF/Transforms/*.h",
]),		]),
hdrs = ["include/mlir/Dialect/SCF/Passes.h"],		hdrs = ["include/mlir/Dialect/SCF/Passes.h"],
includes = ["include"],		includes = ["include"],
deps = [		deps = [
":Affine",		":Affine",
		":DialectUtils",
":IR",		":IR",
":MemRefDialect",		":MemRefDialect",
":Pass",		":Pass",
":SCFDialect",		":SCFDialect",
":SCFPassIncGen",		":SCFPassIncGen",
":StandardOps",		":StandardOps",
":Support",		":Support",
":Transforms",		":Transforms",
▲ Show 20 Lines • Show All 5,479 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][SCF] Peel scf.for loops for even step divison
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 363610

mlir/include/mlir/Dialect/SCF/Passes.h

mlir/include/mlir/Dialect/SCF/Passes.td

mlir/include/mlir/Dialect/SCF/Transforms.h

mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp

mlir/test/Dialect/SCF/for-loop-peeling.mlir

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][SCF] Peel scf.for loops for even step divisonClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 363610

mlir/include/mlir/Dialect/SCF/Passes.h

mlir/include/mlir/Dialect/SCF/Passes.td

mlir/include/mlir/Dialect/SCF/Transforms.h

mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp

mlir/test/Dialect/SCF/for-loop-peeling.mlir

utils/bazel/llvm-project-overlay/mlir/BUILD.bazel

[mlir][SCF] Peel scf.for loops for even step divison
ClosedPublic