Diff 355658

mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml

Show First 20 Lines • Show All 56 Lines • ▼ Show 20 Lines	value: !ScalarExpression
- !ScalarExpression		- !ScalarExpression
symbolic_cast:		symbolic_cast:
type_var: U		type_var: U
operands:		operands:
- !ScalarExpression		- !ScalarExpression
scalar_arg: B		scalar_arg: B
--- !LinalgOpConfig		--- !LinalgOpConfig
metadata: !LinalgOpMetadata		metadata: !LinalgOpMetadata
		name: quantized_matmul
		cpp_class_name: QuantizedMatmulOp
		doc: \|-
		Performs a matrix multiplication of two 2D inputs.

		Numeric casting is performed on the operands to the inner multiply, promoting
		them to the same data type as the accumulator/output. The quantized varient
		includes zero-point adjustments for the left and right operands of the
		matmul.
		structured_op: !LinalgStructuredOpConfig
		args:
		- !LinalgOperandDefConfig
		name: A
		usage: InputOperand
		type_var: T1
		shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
		- !LinalgOperandDefConfig
		name: B
		usage: InputOperand
		type_var: T2
		shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
		- !LinalgOperandDefConfig
		name: AZp
		usage: InputOperand
		type_var: I32
		- !LinalgOperandDefConfig
		name: BZp
		usage: InputOperand
		type_var: I32
		- !LinalgOperandDefConfig
		name: C
		usage: OutputOperand
		type_var: U
		shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
		indexing_maps: !LinalgIndexingMapsConfig
		static_indexing_maps:
		- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
		- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
		- affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
		- affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
		- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
		iterator_types:
		- parallel
		- parallel
		- reduction
		assignments:
		- !ScalarAssign
		arg: C
		value: !ScalarExpression
		scalar_apply:
		fn_name: add
		operands:
		- !ScalarExpression
		scalar_arg: C
		- !ScalarExpression
		scalar_apply:
		fn_name: mul
		operands:
		- !ScalarExpression
		scalar_apply:
		fn_name: sub
		operands:
		- !ScalarExpression
		symbolic_cast:
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: A
		- !ScalarExpression
		symbolic_cast:
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: AZp
		- !ScalarExpression
		scalar_apply:
		fn_name: sub
		operands:
		- !ScalarExpression
		symbolic_cast:
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: B
		- !ScalarExpression
		symbolic_cast:
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: BZp
		--- !LinalgOpConfig
		metadata: !LinalgOpMetadata
name: batch_matmul		name: batch_matmul
cpp_class_name: BatchMatmulOp		cpp_class_name: BatchMatmulOp
doc: \|-		doc: \|-
Performs a batched matrix multiplication of two 3D inputs.		Performs a batched matrix multiplication of two 3D inputs.

Numeric casting is performed on the operands to the inner multiply, promoting		Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.		them to the same data type as the accumulator/output.
implements:		implements:
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	value: !ScalarExpression
- !ScalarExpression		- !ScalarExpression
symbolic_cast:		symbolic_cast:
type_var: U		type_var: U
operands:		operands:
- !ScalarExpression		- !ScalarExpression
scalar_arg: B		scalar_arg: B
--- !LinalgOpConfig		--- !LinalgOpConfig
metadata: !LinalgOpMetadata		metadata: !LinalgOpMetadata
		name: quantized_batch_matmul
		cpp_class_name: QuantizedBatchMatmulOp
		doc: \|-
		Performs a batched matrix multiplication of two 3D inputs.

		Numeric casting is performed on the operands to the inner multiply, promoting
		them to the same data type as the accumulator/output. The quantized varient
		includes zero-point adjustments for the left and right operands of the
		matmul.
		structured_op: !LinalgStructuredOpConfig
		args:
		- !LinalgOperandDefConfig
		name: A
		usage: InputOperand
		type_var: T1
		shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
		- !LinalgOperandDefConfig
		name: B
		usage: InputOperand
		type_var: T2
		shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)>
		- !LinalgOperandDefConfig
		name: AZp
		usage: InputOperand
		type_var: I32
		- !LinalgOperandDefConfig
		name: BZp
		usage: InputOperand
		type_var: I32
		- !LinalgOperandDefConfig
		name: C
		usage: OutputOperand
		type_var: U
		shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
		indexing_maps: !LinalgIndexingMapsConfig
		static_indexing_maps:
		- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
		- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
		- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
		- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
		- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
		iterator_types:
		- parallel
		- parallel
		- parallel
		- reduction
		assignments:
		- !ScalarAssign
		arg: C
		value: !ScalarExpression
		scalar_apply:
		fn_name: add
		operands:
		- !ScalarExpression
		scalar_arg: C
		- !ScalarExpression
		scalar_apply:
		fn_name: mul
		operands:
		- !ScalarExpression
		scalar_apply:
		fn_name: sub
		operands:
		- !ScalarExpression
		symbolic_cast:
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: A
		- !ScalarExpression
		symbolic_cast:
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: AZp
		- !ScalarExpression
		scalar_apply:
		fn_name: sub
		operands:
		- !ScalarExpression
		symbolic_cast:
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: B
		- !ScalarExpression
		symbolic_cast:
		type_var: U
		operands:
		- !ScalarExpression
		scalar_arg: BZp
		--- !LinalgOpConfig
		metadata: !LinalgOpMetadata
name: matvec		name: matvec
cpp_class_name: MatvecOp		cpp_class_name: MatvecOp
doc: \|-		doc: \|-
Performs a matrix-vector multiplication.		Performs a matrix-vector multiplication.

Numeric casting is performed on the operands to the inner multiply, promoting		Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.		them to the same data type as the accumulator/output.
implements:		implements:
▲ Show 20 Lines • Show All 477 Lines • Show Last 20 Lines

mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp

Show First 20 Lines • Show All 1,013 Lines • ▼ Show 20 Lines	matchAndRewrite(tosa::MatMulOp op, ArrayRef<Value> args,
auto outputTy = op.getType().cast<ShapedType>();		auto outputTy = op.getType().cast<ShapedType>();
auto outputElementTy = outputTy.getElementType();		auto outputElementTy = outputTy.getElementType();
auto zeroAttr = rewriter.getZeroAttr(outputElementTy);		auto zeroAttr = rewriter.getZeroAttr(outputElementTy);
Value zero = rewriter.create<ConstantOp>(loc, zeroAttr);		Value zero = rewriter.create<ConstantOp>(loc, zeroAttr);
auto initTensor = rewriter.create<linalg::InitTensorOp>(		auto initTensor = rewriter.create<linalg::InitTensorOp>(
loc, outputTy.getShape(), outputTy.getElementType());		loc, outputTy.getShape(), outputTy.getElementType());
Value zeroTensor =		Value zeroTensor =
rewriter.create<linalg::FillOp>(loc, zero, initTensor).getResult(0);		rewriter.create<linalg::FillOp>(loc, zero, initTensor).getResult(0);
		if (!op.quantization_info()) {
rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(		rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()},		op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()},
ValueRange{zeroTensor});		ValueRange{zeroTensor});
return success();		return success();
}		}

		auto quantizationInfo = op.quantization_info().getValue();
		auto aZp = rewriter.create<ConstantOp>(
		loc, rewriter.getI32IntegerAttr(
		quantizationInfo.a_zp().getValue().getSExtValue()));
		auto bZp = rewriter.create<ConstantOp>(
		loc, rewriter.getI32IntegerAttr(
		quantizationInfo.b_zp().getValue().getSExtValue()));
		rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
		op, TypeRange{op.getType()},
		ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor);

		return success();
		}
};		};

class FullyConnectedConverter		class FullyConnectedConverter
: public OpConversionPattern<tosa::FullyConnectedOp> {		: public OpConversionPattern<tosa::FullyConnectedOp> {
public:		public:
using OpConversionPattern<tosa::FullyConnectedOp>::OpConversionPattern;		using OpConversionPattern<tosa::FullyConnectedOp>::OpConversionPattern;
LogicalResult		LogicalResult
matchAndRewrite(tosa::FullyConnectedOp op, ArrayRef<Value> args,		matchAndRewrite(tosa::FullyConnectedOp op, ArrayRef<Value> args,
ConversionPatternRewriter &rewriter) const final {		ConversionPatternRewriter &rewriter) const final {
Location loc = op.getLoc();		Location loc = op.getLoc();
auto outputTy = op.getType().cast<ShapedType>();		auto outputTy = op.getType().cast<ShapedType>();
auto input = op.input();		auto input = op.input();
auto weight = op.weight();		auto weight = op.weight();
auto bias = op.bias();		auto bias = op.bias();

auto weightTy = weight.getType().cast<ShapedType>();		auto weightTy = weight.getType().cast<ShapedType>();
auto biasTy = bias.getType().cast<ShapedType>();

auto weightShape = weightTy.getShape();		auto weightShape = weightTy.getShape();

if (op.quantization_info())
return failure();

// Creating maps for the output of MatMul and the bias		// Creating maps for the output of MatMul and the bias
SmallVector<AffineMap, 4> indexingMaps;		SmallVector<AffineMap, 4> indexingMaps;

// Broadcast the bias.		// Broadcast the bias.
indexingMaps.push_back(AffineMap::get(/dimCount=/2, /symbolCount=/0,		indexingMaps.push_back(AffineMap::get(/dimCount=/2, /symbolCount=/0,
{rewriter.getAffineDimExpr(1)},		{rewriter.getAffineDimExpr(1)},
rewriter.getContext()));		rewriter.getContext()));

Show All 18 Lines	matchAndRewrite(tosa::FullyConnectedOp op, ArrayRef<Value> args,

SmallVector<int64_t> permutation{1, 0};		SmallVector<int64_t> permutation{1, 0};
auto permutationAttr = DenseIntElementsAttr::get(		auto permutationAttr = DenseIntElementsAttr::get(
RankedTensorType::get({2}, rewriter.getI64Type()), permutation);		RankedTensorType::get({2}, rewriter.getI64Type()), permutation);
Value permutationValue = rewriter.create<ConstantOp>(loc, permutationAttr);		Value permutationValue = rewriter.create<ConstantOp>(loc, permutationAttr);

SmallVector<int64_t> newWeightShape{weightShape[1], weightShape[0]};		SmallVector<int64_t> newWeightShape{weightShape[1], weightShape[0]};
Type newWeightTy =		Type newWeightTy =
RankedTensorType::get(newWeightShape, biasTy.getElementType());		RankedTensorType::get(newWeightShape, weightTy.getElementType());

Value transposedWeight = rewriter.create<tosa::TransposeOp>(		Value transposedWeight = rewriter.create<tosa::TransposeOp>(
loc, newWeightTy, weight, permutationValue);		loc, newWeightTy, weight, permutationValue);

		if (!op.quantization_info()) {
rewriter.replaceOpWithNewOp<linalg::MatmulOp>(		rewriter.replaceOpWithNewOp<linalg::MatmulOp>(
op, TypeRange{op.getType()}, ValueRange{input, transposedWeight},		op, TypeRange{op.getType()}, ValueRange{input, transposedWeight},
linalgOp);		linalgOp);
return success();		return success();
}		}

		auto quantizationInfo = op.quantization_info().getValue();
		auto inputZp = rewriter.create<ConstantOp>(
		loc, rewriter.getI32IntegerAttr(
		quantizationInfo.input_zp().getValue().getSExtValue()));
		auto outputZp = rewriter.create<ConstantOp>(
		loc, rewriter.getI32IntegerAttr(
		quantizationInfo.weight_zp().getValue().getSExtValue()));
		rewriter.replaceOpWithNewOp<linalg::QuantizedMatmulOp>(
		op, TypeRange{op.getType()},
		ValueRange{input, transposedWeight, inputZp, outputZp}, linalgOp);

		return success();
		}
};		};

class ReshapeConverter : public OpConversionPattern<tosa::ReshapeOp> {		class ReshapeConverter : public OpConversionPattern<tosa::ReshapeOp> {
public:		public:
using OpConversionPattern<tosa::ReshapeOp>::OpConversionPattern;		using OpConversionPattern<tosa::ReshapeOp>::OpConversionPattern;

LogicalResult		LogicalResult
matchAndRewrite(tosa::ReshapeOp reshape, ArrayRef<Value> args,		matchAndRewrite(tosa::ReshapeOp reshape, ArrayRef<Value> args,
▲ Show 20 Lines • Show All 1,376 Lines • Show Last 20 Lines

mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py

Show All 15 Lines	def matmul(
Numeric casting is performed on the operands to the inner multiply, promoting		Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.		them to the same data type as the accumulator/output.
"""		"""
implements(ContractionOpInterface)		implements(ContractionOpInterface)
C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n])		C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n])


@linalg_structured_op		@linalg_structured_op
		def quantized_matmul(
		A=TensorDef(T1, S.M, S.K),
		B=TensorDef(T2, S.K, S.N),
		AZp=ScalarDef(I32),
		BZp=ScalarDef(I32),
		C=TensorDef(U, S.M, S.N, output=True)):
		"""Performs a matrix multiplication of two 2D inputs.

		Numeric casting is performed on the operands to the inner multiply, promoting
		stellaraccidentUnsubmitted Not Done Reply Inline Actions Suggest rephrasing: "This quantized variant" (and typo on "varient"). Here and in the batch matmul description. stellaraccident: Suggest rephrasing: "This quantized variant" (and typo on "varient"). Here and in the batch…
		them to the same data type as the accumulator/output. The quantized varient
		includes zero-point adjustments for the left and right operands of the
		matmul.
		"""
		asaadaldienUnsubmitted Done Reply Inline Actions you need to specify the iteration domain e.g `domain(D.m, D.n, D.k)` asaadaldien: you need to specify the iteration domain e.g `domain(D.m, D.n, D.k)`
		C[D.m, D.n] += (cast(U, A[D.m, D.k]) - cast(U, AZp)) * (cast(U, B[D.k, D.n]) - cast(U, BZp))

		@linalg_structured_op
def batch_matmul(		def batch_matmul(
A=TensorDef(T1, Batch, S.M, S.K),		A=TensorDef(T1, Batch, S.M, S.K),
B=TensorDef(T2, Batch, S.K, S.N),		B=TensorDef(T2, Batch, S.K, S.N),
C=TensorDef(U, Batch, S.M, S.N, output=True)):		C=TensorDef(U, Batch, S.M, S.N, output=True)):
"""Performs a batched matrix multiplication of two 3D inputs.		"""Performs a batched matrix multiplication of two 3D inputs.

Numeric casting is performed on the operands to the inner multiply, promoting		Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.		them to the same data type as the accumulator/output.
"""		"""
implements(ContractionOpInterface)		implements(ContractionOpInterface)
C[D.b, D.m, D.n] += cast(U, A[D.b, D.m, D.k]) * cast(U, B[D.b, D.k, D.n])		C[D.b, D.m, D.n] += cast(U, A[D.b, D.m, D.k]) * cast(U, B[D.b, D.k, D.n])

		@linalg_structured_op
		def quantized_batch_matmul(
		A=TensorDef(T1, Batch, S.M, S.K),
		B=TensorDef(T2, Batch, S.K, S.N),
		AZp=ScalarDef(I32),
		BZp=ScalarDef(I32),
		C=TensorDef(U, Batch, S.M, S.N, output=True)):
		"""Performs a batched matrix multiplication of two 3D inputs.

		Numeric casting is performed on the operands to the inner multiply, promoting
		them to the same data type as the accumulator/output. The quantized varient
		includes zero-point adjustments for the left and right operands of the
		matmul.
		"""
		asaadaldienUnsubmitted Done Reply Inline Actions ditto, iteration domain ? asaadaldien: ditto, iteration domain ?
		C[D.b, D.m, D.n] += (cast(U, A[D.b, D.m, D.k]) - cast(U, AZp)) * (cast(U, B[D.b, D.k, D.n]) - cast(U, BZp))


@linalg_structured_op		@linalg_structured_op
def matvec(		def matvec(
A=TensorDef(T1, S.M, S.N),		A=TensorDef(T1, S.M, S.N),
y=TensorDef(T2, S.N),		y=TensorDef(T2, S.N),
x=TensorDef(U, S.M, output=True)):		x=TensorDef(U, S.M, output=True)):
"""Performs a matrix-vector multiplication.		"""Performs a matrix-vector multiplication.

▲ Show 20 Lines • Show All 90 Lines • Show Last 20 Lines

mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir

Show First 20 Lines • Show All 849 Lines • ▼ Show 20 Lines	func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>, %arg2: tensor<1x6xf32>) -> (tensor<1x5x6xf32>) {
// CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32>		// CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32>
// CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>		// CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
%0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>)		%0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>)
return %0 : tensor<1x5x6xf32>		return %0 : tensor<1x5x6xf32>
}		}

// -----		// -----


		// CHECK-LABEL: @matmul_quantized
		func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
		// CHECK: [[C0:%.+]] = constant 0
		// CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
		// CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32>
		// CHECK: [[ONE:%.+]] = constant 1
		// CHECK: [[TWO:%.+]] = constant 2
		// CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
		%0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>)
		return %0 : tensor<1x5x6xi32>
		}

		// -----

// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>		// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>		// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1, d0)>		// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1, d0)>

// CHECK-LABEL: @fully_connected		// CHECK-LABEL: @fully_connected
func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {		func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {
// CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]		// CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2 : tensor<6xf32>) outs([[INITB]] : tensor<5x6xf32>) {		// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2 : tensor<6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
// CHECK: ^bb0([[IN:%.+]]: f32, [[UNUSED:%.+]]: f32):		// CHECK: ^bb0([[IN:%.+]]: f32, [[UNUSED:%.+]]: f32):
// CHECK: linalg.yield [[IN]] : f32		// CHECK: linalg.yield [[IN]] : f32
// CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6]		// CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6]
// CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs([[INITT]]		// CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs([[INITT]]
// CHECK: ^bb0([[IN:%.+]]: f32, [[UNUSED:%.+]]: f32):		// CHECK: ^bb0([[IN:%.+]]: f32, [[UNUSED:%.+]]: f32):
// CHECK: linalg.yield [[IN]] : f32		// CHECK: linalg.yield [[IN]] : f32
// CHECK: linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[GENERIC]] : tensor<5x6xf32>) -> tensor<5x6xf32>		// CHECK: linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[GENERIC]] : tensor<5x6xf32>) -> tensor<5x6xf32>
%0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>)		%0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>)
return %0 : tensor<5x6xf32>		return %0 : tensor<5x6xf32>
}		}

// -----		// -----

		// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
		// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
		// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1, d0)>

		// CHECK-LABEL: @quantized_fully_connected
		func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) {
		// CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
		// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2 : tensor<6xi32>) outs([[INITB]] : tensor<5x6xi32>) {
		// CHECK: ^bb0([[IN:%.+]]: i32, [[UNUSED:%.+]]: i32):
		// CHECK: linalg.yield [[IN]] : i32
		// CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6]
		// CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xi8>) outs([[INITT]]
		// CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
		// CHECK: linalg.yield [[IN]] : i8
		// CHECK: [[ONE:%.+]] = constant 1
		// CHECK: [[TWO:%.+]] = constant 2
		// CHECK: linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[GENERIC]] : tensor<5x6xi32>) -> tensor<5x6xi32>
		%0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> (tensor<5x6xi32>)
		return %0 : tensor<5x6xi32>
		}

		// -----

func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {		func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
%0 = constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>		%0 = constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
// CHECK: [[INDEX0:%.+]] = constant 0 : index		// CHECK: [[INDEX0:%.+]] = constant 0 : index
// CHECK: [[INDEX1:%.+]] = constant 1 : index		// CHECK: [[INDEX1:%.+]] = constant 1 : index
// CHECK: [[ROW0:%.+]] = constant 0 : index		// CHECK: [[ROW0:%.+]] = constant 0 : index
// CHECK: [[LOW0:%.+]] = tensor.extract %cst{{\[}}[[ROW0]], [[INDEX0]]]		// CHECK: [[LOW0:%.+]] = tensor.extract %cst{{\[}}[[ROW0]], [[INDEX0]]]
// CHECK: [[HIGH0:%.+]] = tensor.extract %cst{{\[}}[[ROW0]], [[INDEX1]]]		// CHECK: [[HIGH0:%.+]] = tensor.extract %cst{{\[}}[[ROW0]], [[INDEX1]]]
// CHECK: [[LOW0_IDX:%.+]] = index_cast %0		// CHECK: [[LOW0_IDX:%.+]] = index_cast %0
▲ Show 20 Lines • Show All 615 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][tosa] Add quantized lowering for matmul and fully_connected
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 355658

mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml

mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp

mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py

mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][tosa] Add quantized lowering for matmul and fully_connectedClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 355658

mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml

mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp

mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py

mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir

[mlir][tosa] Add quantized lowering for matmul and fully_connected
ClosedPublic