This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
flang/
-
include/flang/Optimizer/HLFIR/
-
flang/
-
Optimizer/
-
HLFIR/
-
HLFIROps.h
-
HLFIROps.td
-
lib/Optimizer/HLFIR/IR/
-
Optimizer/
-
HLFIR/
-
IR/
1
HLFIROps.cpp
-
test/HLFIR/
-
HLFIR/
-
mul_transpose.f90

Differential D145959

[flang][hlfir] add matmul canonicalizer
ClosedPublic

Authored by tblah on Mar 13 2023, 10:08 AM.

Download Raw Diff

Details

Reviewers

jeanPerier
clementval
vzakhari

Commits

rGa351a60ebae4: [flang][hlfir] add matmul canonicalizer

Summary

hlfir.matmul_transpose will be lowered to a new runtime call.

A canonicalizer was chosen because

Alternative: a new pass for rewriting chained intrinsics - this would add a lot of unnecessary boilerplate.
Alternative: including this in the HLFIR Intrinsic Lowering pass - I wanted to separate these two concerns: not adding a second purpose complicating the intrinsic lowering pass.

With this change, the MLIR built-in canonicalization pass should be run
before the HLFIR Intrinsic Lowering pass.

Depends on D145504, D145957

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

tblah created this revision.Mar 13 2023, 10:08 AM

Herald added projects: Restricted Project, Restricted Project. · View Herald TranscriptMar 13 2023, 10:08 AM

Herald added subscribers: sunshaoce, bzcheeseman, mehdi_amini, rriddle. · View Herald Transcript

tblah requested review of this revision.Mar 13 2023, 10:08 AM

Herald added subscribers: stephenneuendorffer, jdoerfert. · View Herald TranscriptMar 13 2023, 10:08 AM

tblah added a child revision: D145960: [flang] add fused matmul-transpose to the runtime.Mar 13 2023, 10:09 AM

Harbormaster completed remote builds in B219092: Diff 504731.Mar 13 2023, 1:22 PM

tschuett added a subscriber: tschuett.Mar 14 2023, 1:29 AM

jeanPerier added a reviewer: vzakhari.Mar 14 2023, 2:55 AM

I think it makes sense to do this as a canonicalization pattern.

This revision is now accepted and ready to land.Mar 14 2023, 2:56 AM

vzakhari added inline comments.Mar 15 2023, 7:54 PM

flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
653	There is got to be some verification that the two uses are exactly `hlfir.matmul` and `hlfir.destroy`, otherwise, the transformation will silently produce incorrect code in case the HLFIR contract changes at some point.

Thanks for your comment.

Updated to check that the uses are exactly 'hlfir.matmul' and 'hlfir.destroy'.

Harbormaster completed remote builds in B219817: Diff 505748.Mar 16 2023, 3:42 AM

Thank you for the update!

Closed by commit rGa351a60ebae4: [flang][hlfir] add matmul canonicalizer (authored by tblah). · Explain WhyMar 17 2023, 2:31 AM

This revision was automatically updated to reflect the committed changes.

tblah added a commit: rGa351a60ebae4: [flang][hlfir] add matmul canonicalizer.

Revision Contents

Path

Size

flang/

include/

flang/

Optimizer/

HLFIR/

HLFIROps.h

1 line

HLFIROps.td

3 lines

lib/

Optimizer/

HLFIR/

IR/

HLFIROps.cpp

37 lines

test/

HLFIR/

mul_transpose.f90

5 lines

Diff 504731

flang/include/flang/Optimizer/HLFIR/HLFIROps.h

	Show All 9 Lines
	#define FORTRAN_OPTIMIZER_HLFIR_HLFIROPS_H			#define FORTRAN_OPTIMIZER_HLFIR_HLFIROPS_H

	#include "flang/Optimizer/Dialect/FIRAttr.h"			#include "flang/Optimizer/Dialect/FIRAttr.h"
	#include "flang/Optimizer/Dialect/FIRDialect.h"			#include "flang/Optimizer/Dialect/FIRDialect.h"
	#include "flang/Optimizer/Dialect/FIRType.h"			#include "flang/Optimizer/Dialect/FIRType.h"
	#include "flang/Optimizer/Dialect/FortranVariableInterface.h"			#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
	#include "flang/Optimizer/HLFIR/HLFIRDialect.h"			#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
	#include "mlir/Dialect/Arith/IR/Arith.h"			#include "mlir/Dialect/Arith/IR/Arith.h"
				#include "mlir/IR/PatternMatch.h"
	#include "mlir/Interfaces/InferTypeOpInterface.h"			#include "mlir/Interfaces/InferTypeOpInterface.h"
	#include "mlir/Interfaces/SideEffectInterfaces.h"			#include "mlir/Interfaces/SideEffectInterfaces.h"

	#define GET_OP_CLASSES			#define GET_OP_CLASSES
	#include "flang/Optimizer/HLFIR/HLFIROps.h.inc"			#include "flang/Optimizer/HLFIR/HLFIROps.h.inc"

	#endif // FORTRAN_OPTIMIZER_HLFIR_HLFIROPS_H			#endif // FORTRAN_OPTIMIZER_HLFIR_HLFIROPS_H

flang/include/flang/Optimizer/HLFIR/HLFIROps.td

Show First 20 Lines • Show All 376 Lines • ▼ Show 20 Lines	def hlfir_MatmulOp : hlfir_Op<"matmul",
);		);

let results = (outs hlfir_ExprType);		let results = (outs hlfir_ExprType);

let assemblyFormat = [{		let assemblyFormat = [{
$lhs $rhs attr-dict `:` functional-type(operands, results)		$lhs $rhs attr-dict `:` functional-type(operands, results)
}];		}];

		// MATMUL(TRANSPOSE(...), ...) => hlfir.matmul_transpose
		let hasCanonicalizeMethod = 1;

let hasVerifier = 1;		let hasVerifier = 1;
}		}

def hlfir_TransposeOp : hlfir_Op<"transpose", []> {		def hlfir_TransposeOp : hlfir_Op<"transpose", []> {
let summary = "TRANSPOSE transformational intrinsic";		let summary = "TRANSPOSE transformational intrinsic";
let description = [{		let description = [{
Transpose a rank 2 array		Transpose a rank 2 array
}];		}];
▲ Show 20 Lines • Show All 371 Lines • Show Last 20 Lines

flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp

Show All 14 Lines
#include "flang/Optimizer/Dialect/FIRType.h"		#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"		#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "mlir/IR/Builders.h"		#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"		#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/DialectImplementation.h"		#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/Matchers.h"		#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"		#include "mlir/IR/OpImplementation.h"
#include "llvm/ADT/TypeSwitch.h"		#include "llvm/ADT/TypeSwitch.h"
		#include <iterator>
#include <optional>		#include <optional>
#include <tuple>		#include <tuple>

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// DeclareOp		// DeclareOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

/// Is this a fir.[ref/ptr/heap]<fir.[box/class]<fir.heap<T>>> type?		/// Is this a fir.[ref/ptr/heap]<fir.[box/class]<fir.heap<T>>> type?
▲ Show 20 Lines • Show All 602 Lines • ▼ Show 20 Lines	mlir::LogicalResult hlfir::MatmulOp::verify() {
if (resultShape[0] != expectedResultShape[0])		if (resultShape[0] != expectedResultShape[0])
return emitOpError("incorrect result shape");		return emitOpError("incorrect result shape");
if (resultShape.size() == 2 && resultShape[1] != expectedResultShape[1])		if (resultShape.size() == 2 && resultShape[1] != expectedResultShape[1])
return emitOpError("incorrect result shape");		return emitOpError("incorrect result shape");

return mlir::success();		return mlir::success();
}		}

		mlir::LogicalResult
		hlfir::MatmulOp::canonicalize(MatmulOp matmulOp,
		mlir::PatternRewriter &rewriter) {
		auto getNumUses = [](mlir::Value val) {
		auto users = val.getUsers();
		return std::distance(users.begin(), users.end());
		};
		mlir::Value lhs = matmulOp.getLhs();

		// Rewrite MATMUL(TRANSPOSE(lhs), rhs) => hlfir.matmul_transpose lhs, rhs
		if (auto transposeOp = lhs.getDefiningOp<hlfir::TransposeOp>()) {
		// 2 uses: one for the hlfir.matmul and one for hlfir.destroy
		vzakhariUnsubmitted Not Done Reply Inline Actions There is got to be some verification that the two uses are exactly `hlfir.matmul` and `hlfir.destroy`, otherwise, the transformation will silently produce incorrect code in case the HLFIR contract changes at some point. vzakhari: There is got to be some verification that the two uses are exactly `hlfir.matmul` and `hlfir.
		if (getNumUses(transposeOp.getResult()) <= 2) {
		mlir::Location loc = matmulOp.getLoc();
		mlir::Type resultTy = matmulOp.getResult().getType();
		auto matmulTransposeOp = rewriter.create<hlfir::MatmulTransposeOp>(
		loc, resultTy, transposeOp.getArray(), matmulOp.getRhs());

		// we don't need to remove any hlfir.destroy because it will be needed for
		// the new intrinsic result anyway
		rewriter.replaceOp(matmulOp, matmulTransposeOp.getResult());

		// but we do need to get rid of the hlfir.destroy for the hlfir.transpose
		// result (which is entirely removed)
		for (mlir::Operation *user : transposeOp->getResult(0).getUsers())
		if (auto destroyOp = mlir::dyn_cast_or_null<hlfir::DestroyOp>(user))
		rewriter.eraseOp(destroyOp);
		rewriter.eraseOp(transposeOp);

		return mlir::success();
		}
		}

		return mlir::failure();
		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// TransposeOp		// TransposeOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

mlir::LogicalResult hlfir::TransposeOp::verify() {		mlir::LogicalResult hlfir::TransposeOp::verify() {
mlir::Value array = getArray();		mlir::Value array = getArray();
fir::SequenceType arrayTy =		fir::SequenceType arrayTy =
hlfir::getFortranElementOrSequenceType(array.getType())		hlfir::getFortranElementOrSequenceType(array.getType())
▲ Show 20 Lines • Show All 195 Lines • Show Last 20 Lines

flang/test/HLFIR/mul_transpose.f90

	! RUN: bbc -emit-fir -hlfir %s -o - \| FileCheck --check-prefix CHECK-BASE --check-prefix CHECK-ALL %s			! RUN: bbc -emit-fir -hlfir %s -o - \| FileCheck --check-prefix CHECK-BASE --check-prefix CHECK-ALL %s
				! RUN: bbc -emit-fir -hlfir %s -o - \| fir-opt --canonicalize \| FileCheck --check-prefix CHECK-CANONICAL --check-prefix CHECK-ALL %s
	! RUN: bbc -emit-fir -hlfir %s -o - \| fir-opt --lower-hlfir-intrinsics \| FileCheck --check-prefix CHECK-LOWERING --check-prefix CHECK-ALL %s			! RUN: bbc -emit-fir -hlfir %s -o - \| fir-opt --lower-hlfir-intrinsics \| FileCheck --check-prefix CHECK-LOWERING --check-prefix CHECK-ALL %s
	! RUN: bbc -emit-fir -hlfir %s -o - \| fir-opt --lower-hlfir-intrinsics \| fir-opt --bufferize-hlfir \| FileCheck --check-prefix CHECK-BUFFERING --check-prefix CHECK-ALL %s			! RUN: bbc -emit-fir -hlfir %s -o - \| fir-opt --lower-hlfir-intrinsics \| fir-opt --bufferize-hlfir \| FileCheck --check-prefix CHECK-BUFFERING --check-prefix CHECK-ALL %s

	! Test passing a hlfir.expr from one intrinsic to another			! Test passing a hlfir.expr from one intrinsic to another
	subroutine mul_transpose(a, b, res)			subroutine mul_transpose(a, b, res)
	real a(2,1), b(2,2), res(1,2)			real a(2,1), b(2,2), res(1,2)
	res = MATMUL(TRANSPOSE(a), b)			res = MATMUL(TRANSPOSE(a), b)
	endsubroutine			endsubroutine

	! CHECK-ALL-LABEL: func.func @_QPmul_transpose			! CHECK-ALL-LABEL: func.func @_QPmul_transpose
	! CHECK-ALL: %[[A_ARG:.*]]: !fir.ref<!fir.array<2x1xf32>> {fir.bindc_name = "a"}			! CHECK-ALL: %[[A_ARG:.*]]: !fir.ref<!fir.array<2x1xf32>> {fir.bindc_name = "a"}
	! CHECK-ALL: %[[B_ARG:.*]]: !fir.ref<!fir.array<2x2xf32>> {fir.bindc_name = "b"}			! CHECK-ALL: %[[B_ARG:.*]]: !fir.ref<!fir.array<2x2xf32>> {fir.bindc_name = "b"}
	! CHECK-ALL: %[[RES_ARG:.*]]: !fir.ref<!fir.array<1x2xf32>> {fir.bindc_name = "res"}			! CHECK-ALL: %[[RES_ARG:.*]]: !fir.ref<!fir.array<1x2xf32>> {fir.bindc_name = "res"}
	! CHECK-ALL-DAG: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_ARG]]			! CHECK-ALL-DAG: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_ARG]]
	! CHECK-ALL-DAG: %[[B_DECL:.*]]:2 = hlfir.declare %[[B_ARG]]			! CHECK-ALL-DAG: %[[B_DECL:.*]]:2 = hlfir.declare %[[B_ARG]]
	! CHECK-ALL-DAG: %[[RES_DECL:.*]]:2 = hlfir.declare %[[RES_ARG]]			! CHECK-ALL-DAG: %[[RES_DECL:.*]]:2 = hlfir.declare %[[RES_ARG]]

	! CHECK-BASE: %[[TRANSPOSE_RES:.*]] = hlfir.transpose %[[A_DECL]]#0 : (!fir.ref<!fir.array<2x1xf32>>) -> !hlfir.expr<1x2xf32>			! CHECK-BASE: %[[TRANSPOSE_RES:.*]] = hlfir.transpose %[[A_DECL]]#0 : (!fir.ref<!fir.array<2x1xf32>>) -> !hlfir.expr<1x2xf32>
	! CHECK-BASE-NEXT: %[[MATMUL_RES:.]] = hlfir.matmul %[[TRANSPOSE_RES]] %[[B_DECL]]#0 {{.}}: (!hlfir.expr<1x2xf32>, !fir.ref<!fir.array<2x2xf32>>) -> !hlfir.expr<1x2xf32>			! CHECK-BASE-NEXT: %[[MATMUL_RES:.]] = hlfir.matmul %[[TRANSPOSE_RES]] %[[B_DECL]]#0 {{.}}: (!hlfir.expr<1x2xf32>, !fir.ref<!fir.array<2x2xf32>>) -> !hlfir.expr<1x2xf32>
	! CHECK-BASE-NEXT: hlfir.assign %[[MATMUL_RES]] to %[[RES_DECL]]#0 : !hlfir.expr<1x2xf32>, !fir.ref<!fir.array<1x2xf32>>			! CHECK-BASE-NEXT: hlfir.assign %[[MATMUL_RES]] to %[[RES_DECL]]#0 : !hlfir.expr<1x2xf32>, !fir.ref<!fir.array<1x2xf32>>
	! CHECK-BASE-NEXT: hlfir.destroy %[[MATMUL_RES]]			! CHECK-BASE-NEXT: hlfir.destroy %[[MATMUL_RES]]
	! CHECK-BASE-NEXT: hlfir.destroy %[[TRANSPOSE_RES]]			! CHECK-BASE-NEXT: hlfir.destroy %[[TRANSPOSE_RES]]

				! CHECK-CANONICAL-NEXT: %[[CHAIN_RES:.*]] = hlfir.matmul_transpose %[[A_DECL]]#0 %[[B_DECL]]#0 : (!fir.ref<!fir.array<2x1xf32>>, !fir.ref<!fir.array<2x2xf32>>) -> !hlfir.expr<1x2xf32>
				! CHECK-CANONICAL-NEXT: hlfir.assign %[[CHAIN_RES]] to %[[RES_DECL]]#0 : !hlfir.expr<1x2xf32>, !fir.ref<!fir.array<1x2xf32>>
				! CHECK-CANONICAL-NEXT: hlfir.destroy %[[CHAIN_RES]]

	! CHECK-LOWERING: %[[A_BOX:.]] = fir.embox %[[A_DECL]]#1(%{{.}})			! CHECK-LOWERING: %[[A_BOX:.]] = fir.embox %[[A_DECL]]#1(%{{.}})
	! CHECK-LOWERING: %[[TRANSPOSE_CONV_RES:.]] = fir.convert %[[TRANSPOSE_RES_BOX:.]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>			! CHECK-LOWERING: %[[TRANSPOSE_CONV_RES:.]] = fir.convert %[[TRANSPOSE_RES_BOX:.]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
	! CHECK-LOWERING: %[[A_BOX_CONV:.*]] = fir.convert %[[A_BOX]] : (!fir.box<!fir.array<2x1xf32>>) -> !fir.box<none>			! CHECK-LOWERING: %[[A_BOX_CONV:.*]] = fir.convert %[[A_BOX]] : (!fir.box<!fir.array<2x1xf32>>) -> !fir.box<none>
	! CHECK-LOWERING: fir.call @_FortranATranspose(%[[TRANSPOSE_CONV_RES]], %[[A_BOX_CONV]], %[[LOC_STR1:.]], %[[LOC_N1:.]])			! CHECK-LOWERING: fir.call @_FortranATranspose(%[[TRANSPOSE_CONV_RES]], %[[A_BOX_CONV]], %[[LOC_STR1:.]], %[[LOC_N1:.]])
	! CHECK-LOWERING: %[[TRANSPOSE_RES_LD:.]] = fir.load %[[TRANSPOSE_RES_BOX:.]]			! CHECK-LOWERING: %[[TRANSPOSE_RES_LD:.]] = fir.load %[[TRANSPOSE_RES_BOX:.]]
	! CHECK-LOWERING: %[[TRANSPOSE_RES_ADDR:.*]] = fir.box_addr %[[TRANSPOSE_RES_LD]]			! CHECK-LOWERING: %[[TRANSPOSE_RES_ADDR:.*]] = fir.box_addr %[[TRANSPOSE_RES_LD]]
	! CHECK-LOWERING: %[[TRANSPOSE_RES_VAR:.]]:2 = hlfir.declare %[[TRANSPOSE_RES_ADDR]]({{.}}) {uniq_name = ".tmp.intrinsic_result"}			! CHECK-LOWERING: %[[TRANSPOSE_RES_VAR:.]]:2 = hlfir.declare %[[TRANSPOSE_RES_ADDR]]({{.}}) {uniq_name = ".tmp.intrinsic_result"}
	! CHECK-LOWERING: %[[TRUE:.*]] = arith.constant true			! CHECK-LOWERING: %[[TRUE:.*]] = arith.constant true
	▲ Show 20 Lines • Show All 51 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[flang][hlfir] add matmul canonicalizerClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 504731

flang/include/flang/Optimizer/HLFIR/HLFIROps.h

flang/include/flang/Optimizer/HLFIR/HLFIROps.td

flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp

flang/test/HLFIR/mul_transpose.f90

[flang][hlfir] add matmul canonicalizer
ClosedPublic