This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
mlir/
-
lib/Dialect/Linalg/Utils/
-
Dialect/
-
Linalg/
-
Utils/
-
Utils.cpp
-
test/
-
Dialect/Linalg/
-
Linalg/
-
tile-and-distribute.mlir
-
lib/Transforms/
-
Transforms/
-
TestLinalgTransforms.cpp

Differential D102079

[mlir][linalg] Restrict distribution to parallel dims
ClosedPublic

Authored by antiagainst on May 7 2021, 9:30 AM.

Download Raw Diff

Details

Reviewers

mravishankar
nicolasvasilache

Commits

rG7e71823f1deb: [mlir][linalg] Restrict distribution to parallel dims

Summary

According to the API contract, LinalgLoopDistributionOptions
expects to work on parallel iterators. When getting processor
information, only loop ranges for parallel dimensions should
be fed in. But right now after generating scf.for loop nests,
we feed in *all* loops, including the ones materialized for
reduction iterators. This can cause unexpected distribution
of reduction dimensions. This commit fixes it.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

antiagainst created this revision.May 7 2021, 9:30 AM

Herald added a reviewer: mravishankar. · View Herald TranscriptMay 7 2021, 9:30 AM

Herald added subscribers: dcaballe, cota, mravishankar and 16 others. · View Herald Transcript

antiagainst requested review of this revision.May 7 2021, 9:30 AM

Herald added a reviewer: nicolasvasilache. · View Herald TranscriptMay 7 2021, 9:30 AM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: limo1996, stephenneuendorffer, nicolasvasilache. · View Herald Transcript

Harbormaster completed remote builds in B103212: Diff 343698.May 7 2021, 9:59 AM

I am not sure how this would interfact with the case where some loops are not tiled. Would be good to try out examples from here where some loops are not generated as scf.parallel when the tile size is set to 0. In the same way, if the tile size is set to 0, then then loop wont be tiled and therefore not distributed.

Essentially trying to enforce the same contract of distribution that happens with scf.parallel. The proc_id are used for every scf.parallel encountered in the generated tiled loop nest. So if the tile size was set to 0, since it wont be lowered to an scf.parallel it wont be distributed. That does not seem to be the same contract above.

This revision now requires changes to proceed.May 7 2021, 1:49 PM

Actually, I take it back. I think this makes sense. Since the tile size is set to 0, the loop wont be generated. Then the non-parallel loops are filtered out.

This revision is now accepted and ready to land.May 7 2021, 1:52 PM

Closed by commit rG7e71823f1deb: [mlir][linalg] Restrict distribution to parallel dims (authored by antiagainst). · Explain WhyMay 10 2021, 12:23 PM

This revision was automatically updated to reflect the committed changes.

antiagainst added a commit: rG7e71823f1deb: [mlir][linalg] Restrict distribution to parallel dims.

Revision Contents

Path

Size

mlir/

lib/

Dialect/

Linalg/

Utils/

Utils.cpp

32 lines

test/

Dialect/

Linalg/

tile-and-distribute.mlir

36 lines

lib/

Transforms/

TestLinalgTransforms.cpp

13 lines

Diff 344146

mlir/lib/Dialect/Linalg/Utils/Utils.cpp

Show First 20 Lines • Show All 199 Lines • ▼ Show 20 Lines	void GenerateLoopNest<scf::ForOp>::doit(
ArrayRef<Range> loopRanges, LinalgOp linalgOp,		ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,		ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,		function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> distributionOptions) {		Optional<LinalgLoopDistributionOptions> distributionOptions) {
auto iterArgInitValues = linalgOp.getOutputTensors();		auto iterArgInitValues = linalgOp.getOutputTensors();
// Create procInfo so it dominates loops, if appropriate.		// Create procInfo so it dominates loops, if appropriate.
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();		OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
Location loc = edsc::ScopedContext::getLocation();		Location loc = edsc::ScopedContext::getLocation();
SmallVector<ProcInfo, 2> procInfo;
if (distributionOptions.hasValue())		SmallVector<ProcInfo, 4> procInfo;
procInfo = distributionOptions->procInfo(builder, loc, loopRanges);		SmallVector<DistributionMethod, 0> distributionMethod;
		if (distributionOptions.hasValue()) {
		// Collect loop ranges for parallel dimensions.
		SmallVector<Range, 2> parallelLoopRanges;
		for (auto iteratorType : enumerate(iteratorTypes))
		if (isParallelIteratorType(iteratorType.value()))
		parallelLoopRanges.push_back(loopRanges[iteratorType.index()]);

		// Get their distribution schemes.
		distributionMethod = distributionOptions->distributionMethod;
		if (distributionMethod.size() < parallelLoopRanges.size())
		parallelLoopRanges.resize(distributionMethod.size());
		procInfo = distributionOptions->procInfo(builder, loc, parallelLoopRanges);
		}

SmallVector<Value, 4> lbs, ubs, steps;		SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);		unpackRanges(loopRanges, lbs, ubs, steps);
LoopNest loopNest =		LoopNest loopNest =
edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);		edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);

if (!distributionOptions.hasValue() \|\| loopNest.loops.empty())		if (!distributionOptions \|\| loopNest.loops.empty())
return;		return;

// Only supports cyclic distribution for now.		// Filter out scf.for loops that were created out of parallel dimensions.
for (auto it : llvm::zip(loopNest.loops, procInfo,		SmallVector<scf::ForOp, 4> loops;
distributionOptions->distributionMethod))		for (auto iteratorType : enumerate(iteratorTypes))
		if (isParallelIteratorType(iteratorType.value()))
		loops.push_back(loopNest.loops[iteratorType.index()]);

		// Distribute - only supports cyclic distribution for now.
		for (auto it : llvm::zip(loops, procInfo, distributionMethod))
if (std::get<2>(it) == DistributionMethod::Cyclic)		if (std::get<2>(it) == DistributionMethod::Cyclic)
mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId,		mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId,
std::get<1>(it).nprocs);		std::get<1>(it).nprocs);
}		}

/// Specialization to build affine "for" nest.		/// Specialization to build affine "for" nest.
template <>		template <>
void GenerateLoopNest<AffineForOp>::doit(		void GenerateLoopNest<AffineForOp>::doit(
▲ Show 20 Lines • Show All 371 Lines • Show Last 20 Lines

mlir/test/Dialect/Linalg/tile-and-distribute.mlir

// RUN: mlir-opt %s -test-linalg-transform-patterns=test-tile-and-distribute-options -split-input-file \| FileCheck %s		// RUN: mlir-opt %s -test-linalg-transform-patterns=test-tile-and-distribute-options -split-input-file \| FileCheck %s

func @gemm1(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)		func @gemm1(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
{		{
linalg.matmul {__internal_linalg_transform__ = "distribute1"}		linalg.matmul {__internal_linalg_transform__ = "distribute1"}
ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)		ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
outs(%c: memref<?x?xf32>)		outs(%c: memref<?x?xf32>)
return		return
}		}
// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>		// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>
// CHECK: func @gemm1(		// CHECK: func @gemm1(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}		// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}
// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}		// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}
// CHECK: scf.for %[[ARG3:.*]] =		// CHECK: scf.for %[[ARG3:.*]] =
// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]		// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]		// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]		// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]		// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]		// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]		// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX]]]		// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX]]]
Show All 40 Lines	func @gemm3(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
outs(%c: memref<?x?xf32>)		outs(%c: memref<?x?xf32>)
return		return
}		}
// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>		// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>
// CHECK: func @gemm3(		// CHECK: func @gemm3(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}		// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}
// CHECK: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"}		// CHECK-DAG: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"}
// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}		// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}
// CHECK: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"}		// CHECK-DAG: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"}
// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]		// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]		// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]		// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]		// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]
// CHECK: scf.parallel (%[[ARG3:.]], %[[ARG4:.]]) = (%[[LBY]], %[[LBX]]) to (%{{.}}, %{{.}}) step (%[[STEPY]], %[[STEPX]])		// CHECK: scf.parallel (%[[ARG3:.]], %[[ARG4:.]]) = (%[[LBY]], %[[LBX]]) to (%{{.}}, %{{.}}) step (%[[STEPY]], %[[STEPX]])
// CHECK: scf.for %[[ARG5:.*]] =		// CHECK: scf.for %[[ARG5:.*]] =
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG5]]]		// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG5]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG5]], %[[ARG4]]]		// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG5]], %[[ARG4]]]
Show All 9 Lines	func @gemm4(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
outs(%c: memref<?x?xf32>)		outs(%c: memref<?x?xf32>)
return		return
}		}
// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>		// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>
// CHECK: func @gemm4(		// CHECK: func @gemm4(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}		// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}
// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}		// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}
// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]		// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[INBOUNDS:.]] = cmpi slt, %[[LBX]], %{{.}}		// CHECK: %[[INBOUNDS:.]] = cmpi slt, %[[LBX]], %{{.}}
// CHECK: scf.if %[[INBOUNDS]]		// CHECK: scf.if %[[INBOUNDS]]
// CHECK: scf.for %[[ARG3:.*]] =		// CHECK: scf.for %[[ARG3:.*]] =
// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]		// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]		// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]		// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]		// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
Show All 11 Lines	func @gemm5(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
outs(%c: memref<?x?xf32>)		outs(%c: memref<?x?xf32>)
return		return
}		}
// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>		// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>
// CHECK: func @gemm5(		// CHECK: func @gemm5(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}		// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}
// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}		// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}
// CHECK: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"}		// CHECK-DAG: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"}
// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]		// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]		// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]		// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]
// CHECK: %[[INBOUNDS:.]] = cmpi slt, %[[LBY]], %{{.}}		// CHECK: %[[INBOUNDS:.]] = cmpi slt, %[[LBY]], %{{.}}
// CHECK: scf.if %[[INBOUNDS]]		// CHECK: scf.if %[[INBOUNDS]]
// CHECK: scf.parallel (%[[ARG3:.]]) = (%[[LBX]]) to (%{{.}}) step (%[[STEPX]])		// CHECK: scf.parallel (%[[ARG3:.]]) = (%[[LBX]]) to (%{{.}}) step (%[[STEPX]])
// CHECK: scf.for %[[ARG4:.*]] =		// CHECK: scf.for %[[ARG4:.*]] =
// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]		// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
Show All 12 Lines	func @gemm6(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
outs(%c: memref<?x?xf32>)		outs(%c: memref<?x?xf32>)
return		return
}		}
// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>		// CHECK-DAG: #[[MAP0:.]] = affine_map<()[s0] -> (s0 8)>
// CHECK: func @gemm6(		// CHECK: func @gemm6(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>		// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}		// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}
// CHECK: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"}		// CHECK-DAG: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"}
// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}		// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}
// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]		// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]		// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
// CHECK: scf.parallel (%[[ARG3:.]]) = (%[[LBY]]) to (%{{.}}) step (%[[STEPY]])		// CHECK: scf.parallel (%[[ARG3:.]]) = (%[[LBY]]) to (%{{.}}) step (%[[STEPY]])
// CHECK: scf.for %[[ARG4:.*]] =		// CHECK: scf.for %[[ARG4:.*]] =
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]		// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]		// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]]		// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]]
// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]		// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]]		// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]]
// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]		// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]

// -----		// -----

// CHECK: #[[MULMAP:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>		// CHECK: #[[MULMAP:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
// CHECK: #[[ADDMAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>		// CHECK: #[[ADDMAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
// CHECK: func @matmul_tensors(		// CHECK: func @matmul_tensors(
// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor<?x?xf32>		// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor<?x?xf32>		// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor<?x?xf32>) -> tensor<?x?xf32> {		// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
func @matmul_tensors(		func @matmul_tensors(
%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)		%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
-> tensor<?x?xf32> {		-> tensor<?x?xf32> {
// CHECK-DAG: %[[C8:.*]] = constant 8 : index		// CHECK-DAG: %[[C8:.*]] = constant 8 : index
// CHECK-DAG: %[[C0:.*]] = constant 0 : index		// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}		// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}
// CHECK: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"}		// CHECK-DAG: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"}
// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}		// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}
// CHECK: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"}		// CHECK-DAG: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"}
// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]]		// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]]
// CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]		// CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
// CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]]		// CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]]
// CHECK: %[[TD0:.]] = scf.for {{.}} to {{.}} step {{.}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xf32>) {		// CHECK: %[[TD0:.]] = scf.for {{.}} to {{.}} step {{.}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xf32>) {
// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDX]], %[[C8]]]		// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDX]], %[[C8]]]
// CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]		// CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
// CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]		// CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
// CHECK: %[[TD1:.]] = scf.for {{.}} to {{.}} step {{.}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {		// CHECK: %[[TD1:.]] = scf.for {{.}} to {{.}} step {{.}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
Show All 19 Lines

mlir/test/lib/Transforms/TestLinalgTransforms.cpp

Show First 20 Lines • Show All 327 Lines • ▼ Show 20 Lines	patterns.add<LinalgPromotionPattern<MatmulOp>>(
return copyCallBackFn(b, src, dst, true);		return copyCallBackFn(b, src, dst, true);
}),		}),
LinalgTransformationFilter(Identifier::get("PROMOTE", ctx)));		LinalgTransformationFilter(Identifier::get("PROMOTE", ctx)));
}		}

template <typename IdOp, typename NProcsOp>		template <typename IdOp, typename NProcsOp>
static SmallVector<ProcInfo, 2>		static SmallVector<ProcInfo, 2>
getGpuProcIds(OpBuilder &b, Location loc, ArrayRef<Range> parallelLoopRanges) {		getGpuProcIds(OpBuilder &b, Location loc, ArrayRef<Range> parallelLoopRanges) {
		size_t count = std::min<size_t>(3, parallelLoopRanges.size());
		SmallVector<ProcInfo, 2> procInfo(count);
		const char *xyz[] = {"x", "y", "z"};
Type indexType = b.getIndexType();		Type indexType = b.getIndexType();
SmallVector<ProcInfo, 2> procInfo(2);		for (unsigned i = 0; i < count; ++i) {
procInfo[0] = {b.create<IdOp>(loc, indexType, b.getStringAttr("y")),		procInfo[count - 1 - i] = {
b.create<NProcsOp>(loc, indexType, b.getStringAttr("y"))};		b.create<IdOp>(loc, indexType, b.getStringAttr(xyz[i])),
procInfo[1] = {b.create<IdOp>(loc, indexType, b.getStringAttr("x")),		b.create<NProcsOp>(loc, indexType, b.getStringAttr(xyz[i]))};
b.create<NProcsOp>(loc, indexType, b.getStringAttr("x"))};		}
return procInfo;		return procInfo;
}		}

static void fillTileAndDistributePatterns(MLIRContext *context,		static void fillTileAndDistributePatterns(MLIRContext *context,
RewritePatternSet &patterns) {		RewritePatternSet &patterns) {
{		{
LinalgLoopDistributionOptions cyclicNprocsEqNiters;		LinalgLoopDistributionOptions cyclicNprocsEqNiters;
cyclicNprocsEqNiters.distributionMethod.resize(		cyclicNprocsEqNiters.distributionMethod.resize(
▲ Show 20 Lines • Show All 261 Lines • Show Last 20 Lines