Diff 246400

mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h

This file was added.

				//===- ParallelLoopMapper.h - Utilities for mapping parallel loops to GPU ====//
				//
				ftynseUnsubmitted Done Reply Inline Actions Copy-pasta in the header ftynse: Copy-pasta in the header
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// This header file declares the utilities to generate mappings for parallel
				// loops to GPU devices.
				//
				//===----------------------------------------------------------------------===//

				#ifndef MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H
				#define MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H

				namespace mlir {

				struct Region;

				namespace gpu {

				/// Name of the mapping attribute produced by loop mappers.
				static constexpr const char *kMappingAttributeName = "mapping";
				/// Name of the processor sub-attribute that identifies the hardware id
				/// to map a loop to.
				static constexpr const char *kProcessorEntryName = "processor";
				ftynseUnsubmitted Done Reply Inline Actions Do you about the op being a FuncOp? I'd just go with a body region here. ftynse: Do you about the op being a FuncOp? I'd just go with a body region here.
				/// Name of the map sub-attribute that identifies the affine map to apply
				/// to the hardware id to compute the iteration number of the loop. This
				/// map is expected to be extended by step and lower bound computations:
				/// index = map(hardware_id) * step + lowerbound
				static constexpr const char *kIndexMapEntryName = "map";
				/// Name of the bound sub-attribute that itendities the affine map to
				/// compute an upper bound of iterations for the hardware id. This is
				/// applied to an upper bound on the number of iterations:
				/// launchBound = bound(upperbound-lowerbound ceildiv step)
				static constexpr const char *kBoundMapEntryName = "bound";

				} // end namespace gpu

				/// Maps the parallel loops found in the given function to workgroups. The first
				/// loop encountered will be mapped to the global workgroup and the second loop
				/// encountered to the local workgroup. Within each mapping, the first three
				/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
				/// mapped to sequential loops.
				void greedilyMapParallelLoopsToGPU(Region &region);

				} // end namespace mlir

				#endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H
				ftynseUnsubmitted Done Reply Inline Actions Nit: missing newline at the end of file ftynse: Nit: missing newline at the end of file

mlir/include/mlir/Dialect/LoopOps/LoopOps.td

Show First 20 Lines • Show All 283 Lines • ▼ Show 20 Lines	OpBuilder<"Builder *builder, OperationState &result, "
"ValueRange steps">,		"ValueRange steps">,
OpBuilder<"Builder *builder, OperationState &result, "		OpBuilder<"Builder *builder, OperationState &result, "
"ValueRange lowerBounds, ValueRange upperBounds, "		"ValueRange lowerBounds, ValueRange upperBounds, "
"ValueRange steps, ArrayRef<Type> resultTypes">		"ValueRange steps, ArrayRef<Type> resultTypes">
];		];

let extraClassDeclaration = [{		let extraClassDeclaration = [{
Block *getBody() { return &region().front(); }		Block *getBody() { return &region().front(); }
		unsigned getNumInductionVars() {
		return getBody()->getNumArguments();
		}
iterator_range<Block::args_iterator> getInductionVars() {		iterator_range<Block::args_iterator> getInductionVars() {
return {getBody()->args_begin(), getBody()->args_end()};		return {getBody()->args_begin(), getBody()->args_end()};
}		}
unsigned getNumLoops() { return step().size(); }		unsigned getNumLoops() { return step().size(); }
}];		}];
}		}

def ReduceOp : Loop_Op<"reduce", [HasParent<"ParallelOp">]> {		def ReduceOp : Loop_Op<"reduce", [HasParent<"ParallelOp">]> {
▲ Show 20 Lines • Show All 84 Lines • Show Last 20 Lines

mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp

	Show All 11 Lines
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"			#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"

	#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"			#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
	#include "mlir/Dialect/AffineOps/AffineOps.h"			#include "mlir/Dialect/AffineOps/AffineOps.h"
	#include "mlir/Dialect/GPU/GPUDialect.h"			#include "mlir/Dialect/GPU/GPUDialect.h"
				#include "mlir/Dialect/GPU/ParallelLoopMapper.h"
	#include "mlir/Dialect/LoopOps/LoopOps.h"			#include "mlir/Dialect/LoopOps/LoopOps.h"
	#include "mlir/Dialect/StandardOps/IR/Ops.h"			#include "mlir/Dialect/StandardOps/IR/Ops.h"
	#include "mlir/IR/AffineExpr.h"			#include "mlir/IR/AffineExpr.h"
	#include "mlir/IR/BlockAndValueMapping.h"			#include "mlir/IR/BlockAndValueMapping.h"
	#include "mlir/IR/Builders.h"			#include "mlir/IR/Builders.h"
	#include "mlir/Pass/Pass.h"			#include "mlir/Pass/Pass.h"
	#include "mlir/Transforms/DialectConversion.h"			#include "mlir/Transforms/DialectConversion.h"
	#include "mlir/Transforms/LoopUtils.h"			#include "mlir/Transforms/LoopUtils.h"
	▲ Show 20 Lines • Show All 475 Lines • ▼ Show 20 Lines
	struct MappingAnnotation {			struct MappingAnnotation {
	unsigned processor;			unsigned processor;
	AffineMap indexMap;			AffineMap indexMap;
	AffineMap boundMap;			AffineMap boundMap;
	};			};

	} // namespace			} // namespace

	static constexpr const char *kProcessorEntryName = "processor";
	static constexpr const char *kIndexMapEntryName = "map";
	static constexpr const char *kBoundMapEntryName = "bound";

	/// Extracts the mapping annotations from the provided attribute. The attribute			/// Extracts the mapping annotations from the provided attribute. The attribute
	/// is expected to be of the form			/// is expected to be of the form
	/// { processor = <unsigned>, map = <AffineMap>, bound = <AffineMap> }			/// { processor = <unsigned>, map = <AffineMap>, bound = <AffineMap> }
	/// where the bound is optional.			/// where the bound is optional.
	static MappingAnnotation extractMappingAnnotation(Attribute attribute) {			static MappingAnnotation extractMappingAnnotation(Attribute attribute) {
	DictionaryAttr dict = attribute.cast<DictionaryAttr>();			DictionaryAttr dict = attribute.cast<DictionaryAttr>();
	unsigned processor = dict.get(kProcessorEntryName)			unsigned processor = dict.get(gpu::kProcessorEntryName)
	.cast<IntegerAttr>()			.cast<IntegerAttr>()
	.getValue()			.getValue()
	.getSExtValue();			.getSExtValue();
	AffineMap map = dict.get(kIndexMapEntryName).cast<AffineMapAttr>().getValue();			AffineMap map =
				dict.get(gpu::kIndexMapEntryName).cast<AffineMapAttr>().getValue();
	AffineMapAttr boundAttr =			AffineMapAttr boundAttr =
	dict.get(kBoundMapEntryName).dyn_cast_or_null<AffineMapAttr>();			dict.get(gpu::kBoundMapEntryName).dyn_cast_or_null<AffineMapAttr>();
	AffineMap bound;			AffineMap bound;
	if (boundAttr)			if (boundAttr)
	bound = boundAttr.getValue();			bound = boundAttr.getValue();
	return {processor, map, bound};			return {processor, map, bound};
	}			}

	/// Tries to derive a static upper bound from the defining operation of			/// Tries to derive a static upper bound from the defining operation of
	/// `upperBound`.			/// `upperBound`.
	▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
	/// one scope-level up.			/// one scope-level up.
	static LogicalResult processParallelLoop(ParallelOp parallelOp,			static LogicalResult processParallelLoop(ParallelOp parallelOp,
	gpu::LaunchOp launchOp,			gpu::LaunchOp launchOp,
	BlockAndValueMapping &cloningMap,			BlockAndValueMapping &cloningMap,
	SmallVectorImpl<Operation *> &worklist,			SmallVectorImpl<Operation *> &worklist,
	PatternRewriter &rewriter) {			PatternRewriter &rewriter) {
	// TODO(herhut): Verify that this is a valid GPU mapping.			// TODO(herhut): Verify that this is a valid GPU mapping.
	// processor ids: 0-2 block [x/y/z], 3-5 -> thread [x/y/z], 6-> sequential			// processor ids: 0-2 block [x/y/z], 3-5 -> thread [x/y/z], 6-> sequential
	ArrayAttr mapping = parallelOp.getAttrOfType<ArrayAttr>("mapping");			ArrayAttr mapping =
				parallelOp.getAttrOfType<ArrayAttr>(gpu::kMappingAttributeName);

	// TODO(herhut): Support reductions.			// TODO(herhut): Support reductions.
	if (!mapping \|\| parallelOp.getNumResults() != 0)			if (!mapping \|\| parallelOp.getNumResults() != 0)
	return failure();			return failure();

	Location loc = parallelOp.getLoc();			Location loc = parallelOp.getLoc();

	auto launchIndependent = [&launchOp](Value val) {			auto launchIndependent = [&launchOp](Value val) {
	▲ Show 20 Lines • Show All 223 Lines • Show Last 20 Lines

mlir/lib/Dialect/GPU/CMakeLists.txt

	add_llvm_library(MLIRGPU			add_llvm_library(MLIRGPU
	IR/GPUDialect.cpp			IR/GPUDialect.cpp
	Transforms/AllReduceLowering.cpp			Transforms/AllReduceLowering.cpp
	Transforms/KernelOutlining.cpp			Transforms/KernelOutlining.cpp
	Transforms/MemoryPromotion.cpp			Transforms/MemoryPromotion.cpp
				Transforms/ParallelLoopMapper.cpp

	ADDITIONAL_HEADER_DIRS			ADDITIONAL_HEADER_DIRS
	${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU			${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
	)			)
	add_dependencies(MLIRGPU			add_dependencies(MLIRGPU
	MLIRGPUOpsIncGen			MLIRGPUOpsIncGen
	MLIREDSC			MLIREDSC
	MLIRIR			MLIRIR
	Show All 15 Lines

mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp

This file was added.

				//===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
				//
				ftynseUnsubmitted Done Reply Inline Actions Also copy-pasta. ftynse: Also copy-pasta.
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// This file implements utilities to generate mappings for parallel loops to
				// GPU devices.
				//
				//===----------------------------------------------------------------------===//

				#include "mlir/Dialect/GPU/ParallelLoopMapper.h"

				#include "mlir/Dialect/GPU/GPUDialect.h"
				#include "mlir/Dialect/GPU/Passes.h"
				#include "mlir/Dialect/LoopOps/LoopOps.h"
				#include "mlir/IR/AffineMap.h"
				#include "mlir/Pass/Pass.h"

				using namespace mlir;
				using namespace mlir::gpu;
				using namespace mlir::loop;

				namespace {

				enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };

				static constexpr int kNumHardwareIds = 3;

				} // namespace

				/// Bounded increment on MappingLevel. Increments to the next
				/// level unless Sequential was already reached.
				ftynseUnsubmitted Done Reply Inline Actions Nit: the convention is to have file-local types declared in an anonymous namespaces, but file-local functions as "static" outside of such namespaces ftynse: Nit: the convention is to have file-local types declared in an anonymous namespaces, but file…
				MappingLevel &operator++(MappingLevel &mappingLevel) {
				if (mappingLevel < Sequential) {
				mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
				}
				return mappingLevel;
				}

				/// Computed the hardware id to use for a given mapping level. Will
				/// assign x,y and z hardware ids for the first 3 dimensions and use
				/// sequential after.
				ftynseUnsubmitted Done Reply Inline Actions This looks like it would be better placed next to the code that consumes hardware ids, that is the ploop->gpu.launch transformation. ftynse: This looks like it would be better placed next to the code that consumes hardware ids, that is…
				herhutAuthorUnsubmitted Done Reply Inline Actions This is just an elaborate way of counting. The consumer does not use these levels. herhut: This is just an elaborate way of counting. The consumer does not use these levels.
				static int64_t getHardwareIdForMapping(MappingLevel level, int dimension) {
				if (dimension >= kNumHardwareIds \|\| level == Sequential)
				return Sequential * kNumHardwareIds;
				return (level * kNumHardwareIds) + dimension;
				}

				/// Add mapping information to the given parallel loop. Do not add
				/// mapping information if the loop already has it. Also, don't
				/// start a mapping at a nested loop.
				static void mapParallelOp(ParallelOp parallelOp,
				MappingLevel mappingLevel = MapGrid) {
				// Do not try to add a mapping to already mapped loops or nested loops.
				if (parallelOp.getAttr(gpu::kMappingAttributeName) \|\|
				((mappingLevel == MapGrid) && parallelOp.getParentOfType<ParallelOp>()))
				return;

				MLIRContext *ctx = parallelOp.getContext();
				Builder b(ctx);
				SmallVector<Attribute, 4> attrs;
				attrs.reserve(parallelOp.getNumInductionVars());
				for (int i = 0, e = parallelOp.getNumInductionVars(); i < e; ++i) {
				SmallVector<NamedAttribute, 3> entries;
				entries.emplace_back(b.getNamedAttr(
				ftynseUnsubmitted Done Reply Inline Actions `b.getNamedAttr("processor", b.getI64IntegerAttr(...))` ftynse: `b.getNamedAttr("processor", b.getI64IntegerAttr(...))`
				kProcessorEntryName,
				b.getI64IntegerAttr(getHardwareIdForMapping(mappingLevel, i))));
				entries.emplace_back(b.getNamedAttr(
				kIndexMapEntryName, AffineMapAttr::get(b.getDimIdentityMap())));
				entries.emplace_back(b.getNamedAttr(
				kBoundMapEntryName, AffineMapAttr::get(b.getDimIdentityMap())));
				attrs.push_back(DictionaryAttr::get(entries, ctx));
				ftynseUnsubmitted Done Reply Inline Actions Nit: let's factor out reused strings into named constants ftynse: Nit: let's factor out reused strings into named constants
				}
				parallelOp.setAttr(kMappingAttributeName, ArrayAttr::get(attrs, ctx));
				++mappingLevel;
				// Parallel loop operations are immediately nested, so do not use
				// walk but just iterate over the operations.
				for (Operation &op : *parallelOp.getBody()) {
				if (ParallelOp nested = dyn_cast<ParallelOp>(op))
				mapParallelOp(nested, mappingLevel);
				}
				}

				void mlir::greedilyMapParallelLoopsToGPU(Region &region) {
				region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
				}

mlir/test/Dialect/GPU/mapping.mlir

This file was added.

				// RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s \| FileCheck %s

				func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
				%arg3 : index) {
				%zero = constant 0 : index
				%one = constant 1 : index
				%four = constant 4 : index
				loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
				step (%four, %four) {
				loop.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
				step (%one, %one) {
				}
				}
				return
				}

				// CHECK-LABEL: func @parallel_loop(
				// CHECK: loop.parallel
				// CHECK: loop.parallel
				// CHECK: {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64}]}
				// CHECK: {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 0 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 1 : i64}]}
				// CHECK-NOT: mapping

				// -----

				func @parallel_loop_4d(%arg0 : index, %arg1 : index, %arg2 : index,
				%arg3 : index) {
				%zero = constant 0 : index
				%one = constant 1 : index
				%four = constant 4 : index
				loop.parallel (%i0, %i1, %i2, %i3) = (%zero, %zero, %zero, %zero) to (%arg0, %arg1, %arg2, %arg3)
				step (%four, %four, %four, %four) {
				loop.parallel (%si0, %si1, %si2, %si3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
				step (%one, %one, %one, %one) {
				loop.parallel (%ti0, %ti1, %ti2, %ti3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
				step (%one, %one, %one, %one) {
				}
				}
				}
				return
				}

				// CHECK-LABEL: func @parallel_loop_4d(
				// CHECK: loop.parallel
				// CHECK: loop.parallel
				// CHECK: loop.parallel
				// CHECK: {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64}]}
				// CHECK: {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 5 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64}]}
				// CHECK: {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 0 : i64},
				ftynseUnsubmitted Done Reply Inline Actions This test made me wonder about the semantic choice of saying an inner loop in a loop nest being mapped to "sequential" execution as opposed to "don't care about order" . Saying it's sequential implies the first iteration must be completed before starting the second and so on, which isn't the semantics of the original loop nest and isn't that of the generated code that does not include a synchronization after the nested parallel loop. ftynse: This test made me wonder about the semantic choice of saying an inner loop in a loop nest being…
				herhutAuthorUnsubmitted Done Reply Inline Actions You should see this more as "it is mapped to a sequential loop" and describes the generated code rather than specific execution order. The overall execution order is a result of the overall mapping and constrains the original parallel execution semantics (which did not assume any order) into a specific order that assumes some order. This should always be valid. herhut: You should see this more as "it is mapped to a sequential loop" and describes the generated…
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 1 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 2 : i64},
				// CHECK-SAME: {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64}]}
				// CHECK-NOT: mapping

mlir/test/lib/Transforms/CMakeLists.txt

	add_llvm_library(MLIRTestTransforms			add_llvm_library(MLIRTestTransforms
	TestAffineDataCopy.cpp			TestAffineDataCopy.cpp
	TestAllReduceLowering.cpp			TestAllReduceLowering.cpp
	TestCallGraph.cpp			TestCallGraph.cpp
	TestConstantFold.cpp			TestConstantFold.cpp
	TestLoopFusion.cpp			TestLoopFusion.cpp
	TestGpuMemoryPromotion.cpp			TestGpuMemoryPromotion.cpp
				TestGpuParallelLoopMapping.cpp
	TestInlining.cpp			TestInlining.cpp
	TestLinalgTransforms.cpp			TestLinalgTransforms.cpp
	TestLiveness.cpp			TestLiveness.cpp
	TestLoopMapping.cpp			TestLoopMapping.cpp
	TestLoopParametricTiling.cpp			TestLoopParametricTiling.cpp
	TestOpaqueLoc.cpp			TestOpaqueLoc.cpp
	TestMemRefBoundCheck.cpp			TestMemRefBoundCheck.cpp
	TestMemRefDependenceCheck.cpp			TestMemRefDependenceCheck.cpp
	Show All 32 Lines

mlir/test/lib/Transforms/TestGpuParallelLoopMapping.cpp

This file was added.

				//===- TestGPUParallelLoopMapping.cpp - Test pass for GPU loop mapping ----===//
				//
				ftynseUnsubmitted Done Reply Inline Actions Copy-pasta ftynse: Copy-pasta
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				//
				// This file implements the pass testing the utilities for mapping parallel
				// loops to gpu hardware ids.
				//
				//===----------------------------------------------------------------------===//

				#include "mlir/Dialect/GPU/ParallelLoopMapper.h"
				#include "mlir/Pass/Pass.h"

				using namespace mlir;

				namespace {
				/// Simple pass for testing the mapping of parallel loops to hardware ids using
				/// a greedy mapping stratgegy.
				class TestGpuGreedyParallelLoopMappingPass
				: public OperationPass<TestGpuGreedyParallelLoopMappingPass, FuncOp> {
				void runOnOperation() override {
				Operation *op = getOperation();
				for (Region &region : op->getRegions())
				greedilyMapParallelLoopsToGPU(region);
				}
				};
				} // end namespace

				namespace mlir {
				void registerTestGpuParallelLoopMappingPass() {
				PassRegistration<TestGpuGreedyParallelLoopMappingPass> registration(
				"test-gpu-greedy-parallel-loop-mapping",
				"Greedily maps all parallel loops to gpu hardware ids.");
				}
				} // namespace mlir

mlir/tools/mlir-opt/mlir-opt.cpp

Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
void registerTestLivenessPass();		void registerTestLivenessPass();
void registerTestLoopFusion();		void registerTestLoopFusion();
void registerTestLoopMappingPass();		void registerTestLoopMappingPass();
void registerTestMatchers();		void registerTestMatchers();
void registerTestMemRefDependenceCheck();		void registerTestMemRefDependenceCheck();
void registerTestMemRefStrideCalculation();		void registerTestMemRefStrideCalculation();
void registerTestOpaqueLoc();		void registerTestOpaqueLoc();
void registerTestParallelismDetection();		void registerTestParallelismDetection();
		void registerTestGpuParallelLoopMappingPass();
void registerTestVectorConversions();		void registerTestVectorConversions();
void registerTestVectorToLoopsPass();		void registerTestVectorToLoopsPass();
void registerVectorizerTestPass();		void registerVectorizerTestPass();
} // namespace mlir		} // namespace mlir

static cl::opt<std::string>		static cl::opt<std::string>
inputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));		inputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));

Show All 37 Lines	void registerTestPasses() {
registerTestLivenessPass();		registerTestLivenessPass();
registerTestLoopFusion();		registerTestLoopFusion();
registerTestLoopMappingPass();		registerTestLoopMappingPass();
registerTestMatchers();		registerTestMatchers();
registerTestMemRefDependenceCheck();		registerTestMemRefDependenceCheck();
registerTestMemRefStrideCalculation();		registerTestMemRefStrideCalculation();
registerTestOpaqueLoc();		registerTestOpaqueLoc();
registerTestParallelismDetection();		registerTestParallelismDetection();
		registerTestGpuParallelLoopMappingPass();
registerTestVectorConversions();		registerTestVectorConversions();
registerTestVectorToLoopsPass();		registerTestVectorToLoopsPass();
registerVectorizerTestPass();		registerVectorizerTestPass();

// The following passes are using global initializers, just link them in.		// The following passes are using global initializers, just link them in.
if (std::getenv("bar") != (char *)-1)		if (std::getenv("bar") != (char *)-1)
return;		return;

Show All 35 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[MLIR][GPU] Implement a simple greedy loop mapper.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 246400

mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h

mlir/include/mlir/Dialect/LoopOps/LoopOps.td

mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp

mlir/lib/Dialect/GPU/CMakeLists.txt

mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp

mlir/test/Dialect/GPU/mapping.mlir

mlir/test/lib/Transforms/CMakeLists.txt

mlir/test/lib/Transforms/TestGpuParallelLoopMapping.cpp

mlir/tools/mlir-opt/mlir-opt.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[MLIR][GPU] Implement a simple greedy loop mapper.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 246400

mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h

mlir/include/mlir/Dialect/LoopOps/LoopOps.td

mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp

mlir/lib/Dialect/GPU/CMakeLists.txt

mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp

mlir/test/Dialect/GPU/mapping.mlir

mlir/test/lib/Transforms/CMakeLists.txt

mlir/test/lib/Transforms/TestGpuParallelLoopMapping.cpp

mlir/tools/mlir-opt/mlir-opt.cpp

[MLIR][GPU] Implement a simple greedy loop mapper.
ClosedPublic