This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
mlir/
-
include/mlir/Dialect/Linalg/IR/
-
mlir/
-
Dialect/
-
Linalg/
-
IR/
-
LinalgNamedStructuredOps.yaml
-
test/Dialect/Linalg/
-
Dialect/
-
Linalg/
-
named-ops.mlir

Differential D141430

[mlir][linalg] Add named op for matmul_transpose_b
AcceptedPublic

Authored by ThomasRaoux on Jan 10 2023, 2:00 PM.

Download Raw Diff

Details

Reviewers

nicolasvasilache
hanchung

Commits

rGd0bf55cfdbe1: [mlir]][linalg] Add named op for matmul_transpose_b

Summary

matmul with transposed RHS operand allows better memory access
patterns on several architectures including common GPUs. Having a named
op for it allows to handle this kind of matmul in a more explicit way.

Diff Detail

Event Timeline

ThomasRaoux created this revision.Jan 10 2023, 2:00 PM

Herald added a reviewer: hanchung. · View Herald TranscriptJan 10 2023, 2:00 PM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: hanchung, Moerafaat, bzcheeseman and 22 others. · View Herald Transcript

ThomasRaoux requested review of this revision.Jan 10 2023, 2:00 PM

Herald added a project: Restricted Project. · View Herald TranscriptJan 10 2023, 2:00 PM

Herald added subscribers: limo1996, stephenneuendorffer. · View Herald Transcript

hanchung accepted this revision.Jan 10 2023, 2:21 PM

This revision is now accepted and ready to land.Jan 10 2023, 2:21 PM

Harbormaster completed remote builds in B206929: Diff 487984.Jan 10 2023, 7:48 PM

Closed by commit rGd0bf55cfdbe1: [mlir]][linalg] Add named op for matmul_transpose_b (authored by ThomasRaoux). · Explain WhyJan 11 2023, 11:50 AM

This revision was automatically updated to reflect the committed changes.

ThomasRaoux added a commit: rGd0bf55cfdbe1: [mlir]][linalg] Add named op for matmul_transpose_b.

Hello, it appears that the changes made only include the generated yaml file and additional tests. However, it seems that core_named_ops.py was not included. Could you please confirm this?

Herald added subscribers: bviyer, awarzynski. · View Herald TranscriptMay 6 2023, 12:26 PM

Yes, this is a problem indeed, the source of truth is the .py file.

This revision is now accepted and ready to land.May 7 2023, 12:20 AM

OK, I can send a fix.

kon72 mentioned this in D150059: [mlir][linalg] Add missing op to match the generated file.May 7 2023, 2:09 AM

nicolasvasilache mentioned this in rG9a592d82a0da: [mlir][linalg] Add missing op to match the generated file.Jun 26 2023, 12:55 PM

Matt added a subscriber: Matt.Jul 19 2023, 12:53 PM

Revision Contents

Path

Size

mlir/

include/

mlir/

Dialect/

Linalg/

IR/

LinalgNamedStructuredOps.yaml

142 lines

test/

Dialect/

Linalg/

named-ops.mlir

22 lines

Diff 487984

mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml

Show First 20 Lines • Show All 474 Lines • ▼ Show 20 Lines	value: !ScalarExpression
kind: type		kind: type
fn_name: cast_signed		fn_name: cast_signed
type_var: AccumType		type_var: AccumType
operands:		operands:
- !ScalarExpression		- !ScalarExpression
scalar_arg: rhs		scalar_arg: rhs
--- !LinalgOpConfig		--- !LinalgOpConfig
metadata: !LinalgOpMetadata		metadata: !LinalgOpMetadata
		name: matmul_transpose_b
		cpp_class_name: MatmulTransposeBOp
		doc: \|-
		Performs a matrix multiplication of two 2D inputs with rhs operand transposed.

		Numeric casting is performed on the operands to the inner multiply, promoting
		them to the same data type as the accumulator/output.
		implements:
		- LinalgContractionOpInterface
		structured_op: !LinalgStructuredOpConfig
		args:
		- !LinalgOperandDefConfig
		name: A
		kind: input_tensor
		type_var: T1
		shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
		- !LinalgOperandDefConfig
		name: B
		kind: input_tensor
		type_var: T2
		shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
		- !LinalgOperandDefConfig
		name: C
		kind: output_tensor
		type_var: U
		shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
		- !LinalgOperandDefConfig
		name: cast
		kind: type_fn_attr
		default_fn: cast_signed
		indexing_maps: !LinalgIndexingMapsConfig
		static_indexing_maps:
		- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
		- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d1, d2)>
		- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
		iterator_types:
		- parallel
		- parallel
		- reduction
		assignments:
		- !ScalarAssign
		arg: C
		value: !ScalarExpression
		scalar_fn:
		kind: binary
		fn_name: add
		operands:
		- !ScalarExpression
		scalar_arg: C
		- !ScalarExpression
		scalar_fn:
		kind: binary
		fn_name: mul
		operands:
		- !ScalarExpression
		scalar_fn:
		kind: type
		attr_name: cast
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: A
		- !ScalarExpression
		scalar_fn:
		kind: type
		attr_name: cast
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: B
		--- !LinalgOpConfig
		metadata: !LinalgOpMetadata
name: batch_matmul		name: batch_matmul
cpp_class_name: BatchMatmulOp		cpp_class_name: BatchMatmulOp
doc: \|-		doc: \|-
Performs a batched matrix multiplication of two 3D inputs.		Performs a batched matrix multiplication of two 3D inputs.

Numeric casting is performed on the operands to the inner multiply, promoting		Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.		them to the same data type as the accumulator/output.
implements:		implements:
▲ Show 20 Lines • Show All 158 Lines • ▼ Show 20 Lines	value: !ScalarExpression
kind: type		kind: type
fn_name: cast_signed		fn_name: cast_signed
type_var: U		type_var: U
operands:		operands:
- !ScalarExpression		- !ScalarExpression
scalar_arg: BZp		scalar_arg: BZp
--- !LinalgOpConfig		--- !LinalgOpConfig
metadata: !LinalgOpMetadata		metadata: !LinalgOpMetadata
		name: batch_matmul_transpose_b
		cpp_class_name: BatchMatmulTransposeBOp
		doc: \|-
		Performs a batched matrix multiplication of two 3D inputs where rhs operand has its non-batch
		dimensions transposed.

		Numeric casting is performed on the operands to the inner multiply, promoting
		them to the same data type as the accumulator/output.
		implements:
		- LinalgContractionOpInterface
		structured_op: !LinalgStructuredOpConfig
		args:
		- !LinalgOperandDefConfig
		name: A
		kind: input_tensor
		type_var: T1
		shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
		- !LinalgOperandDefConfig
		name: B
		kind: input_tensor
		type_var: T2
		shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)>
		- !LinalgOperandDefConfig
		name: C
		kind: output_tensor
		type_var: U
		shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
		indexing_maps: !LinalgIndexingMapsConfig
		static_indexing_maps:
		- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
		- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d2, d3)>
		- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
		iterator_types:
		- parallel
		- parallel
		- parallel
		- reduction
		assignments:
		- !ScalarAssign
		arg: C
		value: !ScalarExpression
		scalar_fn:
		kind: binary
		fn_name: add
		operands:
		- !ScalarExpression
		scalar_arg: C
		- !ScalarExpression
		scalar_fn:
		kind: binary
		fn_name: mul
		operands:
		- !ScalarExpression
		scalar_fn:
		kind: type
		fn_name: cast_signed
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: A
		- !ScalarExpression
		scalar_fn:
		kind: type
		fn_name: cast_signed
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: B
		--- !LinalgOpConfig
		metadata: !LinalgOpMetadata
name: batch_reduce_matmul		name: batch_reduce_matmul
cpp_class_name: BatchReduceMatmulOp		cpp_class_name: BatchReduceMatmulOp
doc: \|-		doc: \|-
Performs a batch-reduce matrix multiplication of two 3D inputs.		Performs a batch-reduce matrix multiplication of two 3D inputs.
The partial multiplication results are reduced into a 2D output.		The partial multiplication results are reduced into a 2D output.

Numeric casting is performed on the operands to the inner multiply, promoting		Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.		them to the same data type as the accumulator/output.
▲ Show 20 Lines • Show All 3,919 Lines • Show Last 20 Lines

mlir/test/Dialect/Linalg/named-ops.mlir

	Show First 20 Lines • Show All 1,064 Lines • ▼ Show 20 Lines

	func.func @batch_reduce_matmul(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?xf32>) {			func.func @batch_reduce_matmul(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?xf32>) {
	// CHECK: linalg.batch_reduce_matmul			// CHECK: linalg.batch_reduce_matmul
	// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)			// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
	// CHECK-SAME: outs(%{{.+}} : memref<?x?xf32>)			// CHECK-SAME: outs(%{{.+}} : memref<?x?xf32>)
	linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<?x?x?xf32>) outs(%arg2: memref<?x?xf32>)			linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<?x?x?xf32>) outs(%arg2: memref<?x?xf32>)
	return			return
	}			}

				// -----

				// CHECK-LABEL: func @matmul_transpose_b
				// CHECK: linalg.matmul_transpose_b
				// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<3x5xf32>, memref<7x5xf32>)
				// CHECK-SAME: outs(%{{.+}} : memref<3x7xf32>)
				func.func @matmul_transpose_b(%arg0: memref<3x5xf32>, %arg1: memref<7x5xf32>, %arg2: memref<3x7xf32>) {
				linalg.matmul_transpose_b ins(%arg0, %arg1 : memref<3x5xf32>, memref<7x5xf32>) outs(%arg2: memref<3x7xf32>)
				return
				}

				// -----

				// CHECK-LABEL: func @batchmatmul_transpose_b
				// CHECK: linalg.batch_matmul_transpose_b
				// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x3x5xf32>, memref<2x7x5xf32>)
				// CHECK-SAME: outs(%{{.+}} : memref<2x3x7xf32>)
				func.func @batchmatmul_transpose_b(%arg0: memref<2x3x5xf32>, %arg1: memref<2x7x5xf32>, %arg2: memref<2x3x7xf32>) {
				linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<2x3x5xf32>, memref<2x7x5xf32>) outs(%arg2: memref<2x3x7xf32>)
				return
				}