This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AArch64/
-
Target/
-
AArch64/
1
AArch64ISelLowering.cpp
-
test/CodeGen/AArch64/
-
CodeGen/
-
AArch64/
1
sve-mull.ll

Differential D148248

[AArch64][SVE] Generate smull/umull instead of sve v2i64 mul
ClosedPublic

Authored by dmgreen on Apr 13 2023, 9:34 AM.

Download Raw Diff

Details

Reviewers

sdesmalen
david-arm
paulwalker-arm
efriedma
dtemirbulatov
SjoerdMeijer

Commits

rGd340ef697d90: [AArch64][SVE] Generate smull/umull instead of sve v2i64 mul

Summary

A neon smull/umull should be preferred over a sve v2i64 mul with two extends. It will be both less instructions and a lower cost multiply instruction.

Diff Detail

Event Timeline

dmgreen created this revision.Apr 13 2023, 9:34 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 13 2023, 9:34 AM

Herald added subscribers: ctetreau, psnobl, hiraditya and 2 others. · View Herald Transcript

dmgreen requested review of this revision.Apr 13 2023, 9:34 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 13 2023, 9:34 AM

Harbormaster completed remote builds in B225387: Diff 513280.Apr 13 2023, 9:35 AM

Matt added a subscriber: Matt.Apr 13 2023, 12:42 PM

One possible simplification but otherwise looks good.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
4580	Can this be just `hasSVE()`. `useSVEForFixedLengthVectorVT()` is used to detect when bigger than 128-bit vectors are support and then sometime extended to allow NEON sized vectors. In this instance you don't care about that and have done all the type checking yourself and so you only need to check for the presence of SVE.
llvm/test/CodeGen/AArch64/sve-mull.ll
4–6	I guess you've pre-committed these tests but I think it would have been better to add a `-mattr=+sve` RUN line to `aarch64-smull.ll` and thus ensure other related patterns are not degraded when SVE is enabled.

This revision is now accepted and ready to land.Apr 14 2023, 2:51 AM

This revision was landed with ongoing or failed builds.Apr 26 2023, 2:12 PM

Closed by commit rGd340ef697d90: [AArch64][SVE] Generate smull/umull instead of sve v2i64 mul (authored by dmgreen). · Explain Why

This revision was automatically updated to reflect the committed changes.

dmgreen added a commit: rGd340ef697d90: [AArch64][SVE] Generate smull/umull instead of sve v2i64 mul.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64ISelLowering.cpp

12 lines

test/

CodeGen/

AArch64/

sve-mull.ll

38 lines

Diff 513280

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,551 Lines • ▼ Show 20 Lines	static unsigned selectUmullSmull(SDNode &N0, SDNode &N1, SelectionDAG &DAG,
}		}
return 0;		return 0;
}		}

SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {		SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();

// If SVE is available then i64 vector multiplications can also be made legal.		// If SVE is available then i64 vector multiplications can also be made legal.
bool OverrideNEON = VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\|		bool OverrideNEON =
Subtarget->forceStreamingCompatibleSVE();		VT == MVT::v1i64 \|\| Subtarget->forceStreamingCompatibleSVE();

if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT, OverrideNEON))		if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);

// Multiplications are only custom-lowered for 128-bit vectors so that		// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.		// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
assert(VT.is128BitVector() && VT.isInteger() &&		assert(VT.is128BitVector() && VT.isInteger() &&
"unexpected type for custom-lowering ISD::MUL");		"unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();		SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();		SDNode *N1 = Op.getOperand(1).getNode();
bool isMLA = false;		bool isMLA = false;
SDLoc DL(Op);		SDLoc DL(Op);
unsigned NewOpc = selectUmullSmull(N0, N1, DAG, DL, isMLA);		unsigned NewOpc = selectUmullSmull(N0, N1, DAG, DL, isMLA);

if (!NewOpc) {		if (!NewOpc) {
if (VT == MVT::v2i64)		if (VT == MVT::v2i64) {
		// If SVE is available then i64 vector multiplications can also be made
		// legal.
		if (useSVEForFixedLengthVectorVT(VT, true))
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Can this be just `hasSVE()`. `useSVEForFixedLengthVectorVT()` is used to detect when bigger than 128-bit vectors are support and then sometime extended to allow NEON sized vectors. In this instance you don't care about that and have done all the type checking yourself and so you only need to check for the presence of SVE. paulwalker-arm: Can this be just `hasSVE()`. `useSVEForFixedLengthVectorVT()` is used to detect when bigger…
		return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
// Fall through to expand this. It is not legal.		// Fall through to expand this. It is not legal.
return SDValue();		return SDValue();
else		} else
// Other vector multiplications are legal.		// Other vector multiplications are legal.
return Op;		return Op;
}		}

// Legalize to a S/UMULL instruction		// Legalize to a S/UMULL instruction
SDValue Op0;		SDValue Op0;
SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);		SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
if (!isMLA) {		if (!isMLA) {
▲ Show 20 Lines • Show All 20,084 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-mull.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
	; RUN: llc -mtriple=aarch64-none-eabi -mattr=+sve < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64-none-eabi -mattr=+sve < %s \| FileCheck %s

	define <2 x i64> @smull_v2i64(<2 x i32> %op1, <2 x i32> %op2) #0 {			define <2 x i64> @smull_v2i64(<2 x i32> %op1, <2 x i32> %op2) #0 {
	; CHECK-LABEL: smull_v2i64:			; CHECK-LABEL: smull_v2i64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
				paulwalker-armUnsubmitted Not Done Reply Inline Actions I guess you've pre-committed these tests but I think it would have been better to add a `-mattr=+sve` RUN line to `aarch64-smull.ll` and thus ensure other related patterns are not degraded when SVE is enabled. paulwalker-arm: I guess you've pre-committed these tests but I think it would have been better to add a `…
	; CHECK-NEXT: sshll v0.2d, v0.2s, #0			; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
	; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: sshll v1.2d, v1.2s, #0
	; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
	; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a = sext <2 x i32> %op1 to <2 x i64>			%a = sext <2 x i32> %op1 to <2 x i64>
	%b = sext <2 x i32> %op2 to <2 x i64>			%b = sext <2 x i32> %op2 to <2 x i64>
	%res = mul <2 x i64> %a, %b			%res = mul <2 x i64> %a, %b
	ret <2 x i64> %res			ret <2 x i64> %res
	}			}

	define <2 x i64> @umull_v2i64(<2 x i32> %op1, <2 x i32> %op2) #0 {			define <2 x i64> @umull_v2i64(<2 x i32> %op1, <2 x i32> %op2) #0 {
	; CHECK-LABEL: umull_v2i64:			; CHECK-LABEL: umull_v2i64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ushll v0.2d, v0.2s, #0			; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
	; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: ushll v1.2d, v1.2s, #0
	; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
	; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a = zext <2 x i32> %op1 to <2 x i64>			%a = zext <2 x i32> %op1 to <2 x i64>
	%b = zext <2 x i32> %op2 to <2 x i64>			%b = zext <2 x i32> %op2 to <2 x i64>
	%res = mul <2 x i64> %a, %b			%res = mul <2 x i64> %a, %b
	ret <2 x i64> %res			ret <2 x i64> %res
	}			}

	define <4 x i64> @smull_v4i64(<4 x i32> %op1, <4 x i32> %op2) #0 {			define <4 x i64> @smull_v4i64(<4 x i32> %op1, <4 x i32> %op2) #0 {
	; CHECK-LABEL: smull_v4i64:			; CHECK-LABEL: smull_v4i64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: sshll v2.2d, v0.2s, #0			; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
	; CHECK-NEXT: ptrue p0.d, vl2			; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
	; CHECK-NEXT: sshll2 v0.2d, v0.4s, #0			; CHECK-NEXT: mov v1.16b, v2.16b
	; CHECK-NEXT: sshll v3.2d, v1.2s, #0
	; CHECK-NEXT: sshll2 v1.2d, v1.4s, #0
	; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
	; CHECK-NEXT: movprfx z0, z2
	; CHECK-NEXT: mul z0.d, p0/m, z0.d, z3.d
	; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
	; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a = sext <4 x i32> %op1 to <4 x i64>			%a = sext <4 x i32> %op1 to <4 x i64>
	%b = sext <4 x i32> %op2 to <4 x i64>			%b = sext <4 x i32> %op2 to <4 x i64>
	%res = mul <4 x i64> %a, %b			%res = mul <4 x i64> %a, %b
	ret <4 x i64> %res			ret <4 x i64> %res
	}			}

	define <4 x i64> @umull_v4i64(<4 x i32> %op1, <4 x i32> %op2) #0 {			define <4 x i64> @umull_v4i64(<4 x i32> %op1, <4 x i32> %op2) #0 {
	; CHECK-LABEL: umull_v4i64:			; CHECK-LABEL: umull_v4i64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ushll v2.2d, v0.2s, #0			; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
	; CHECK-NEXT: ptrue p0.d, vl2			; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
	; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0			; CHECK-NEXT: mov v1.16b, v2.16b
	; CHECK-NEXT: ushll v3.2d, v1.2s, #0
	; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
	; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
	; CHECK-NEXT: movprfx z0, z2
	; CHECK-NEXT: mul z0.d, p0/m, z0.d, z3.d
	; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
	; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a = zext <4 x i32> %op1 to <4 x i64>			%a = zext <4 x i32> %op1 to <4 x i64>
	%b = zext <4 x i32> %op2 to <4 x i64>			%b = zext <4 x i32> %op2 to <4 x i64>
	%res = mul <4 x i64> %a, %b			%res = mul <4 x i64> %a, %b
	ret <4 x i64> %res			ret <4 x i64> %res
	}			}