This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AArch64/
-
Target/
-
AArch64/
1
AArch64ISelLowering.cpp
-
test/CodeGen/AArch64/
-
CodeGen/
-
AArch64/
-
arm64-vmul.ll
-
reassocmls.ll

Differential D143143

[AArch64] Reassociate sub(x, add(m1, m2)) to sub(sub(x, m1), m2)
ClosedPublic

Authored by dmgreen on Feb 2 2023, 12:41 AM.

Download Raw Diff

Details

Reviewers

SjoerdMeijer
samtebbs
fhahn
labrinea

Commits

rGe9eaee9da196: [AArch64] Reassociate sub(x, add(m1, m2)) to sub(sub(x, m1), m2)
rGc52255d26a23: [AArch64] Reassociate sub(x, add(m1, m2)) to sub(sub(x, m1), m2)

Summary

The mid end will reassociate sub(sub(x, m1), m2) to sub(x, add(m1, m2)). This reassociates it back to allow the creation of more mls instructions.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

dmgreen created this revision.Feb 2 2023, 12:41 AM

Herald added a project: Restricted Project. · View Herald TranscriptFeb 2 2023, 12:41 AM

Herald added subscribers: StephenFan, hiraditya, kristof.beyls. · View Herald Transcript

dmgreen requested review of this revision.Feb 2 2023, 12:41 AM

Herald added a project: Restricted Project. · View Herald TranscriptFeb 2 2023, 12:41 AM

Harbormaster completed remote builds in B211402: Diff 494188.Feb 2 2023, 12:42 AM

Ping

The patch looks good to me, but I was just wondering if another approach would be to just match the sub(x, add(m1, m2)). pattern as mls, or is this easier/better?

In D143143#4114749, @SjoerdMeijer wrote:

The patch looks good to me, but I was just wondering if another approach would be to just match the sub(x, add(m1, m2)). pattern as mls, or is this easier/better?

Thats for taking a look. That might be an option, but it would need to match sub(x, add(mul(a,b), mul(c,d))) to two msub(msub(x, a, b), c, d) for all the different types of mls. I think it is probably simpler to go the un-reassociate route.

LGTM, thanks! Given @dmgreen's point about the number of patterns that would be required the current patch seems like a more general solution.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
17717	It might be good to use the same names here as in the comment above, i.e. `m1` and `m2`?

This revision is now accepted and ready to land.Feb 9 2023, 3:35 AM

Yeah, makes sense, cheers. LGTM too

This revision was landed with ongoing or failed builds.Feb 10 2023, 10:09 AM

Closed by commit rGc52255d26a23: [AArch64] Reassociate sub(x, add(m1, m2)) to sub(sub(x, m1), m2) (authored by dmgreen). · Explain Why

This revision was automatically updated to reflect the committed changes.

dmgreen added a commit: rGc52255d26a23: [AArch64] Reassociate sub(x, add(m1, m2)) to sub(sub(x, m1), m2).

This caused hangs when compiling some source files from ffmpeg, libvpx and libaom. One repro is available at https://martin.st/temp/adxenc.c, triggered with clang -target aarch64-w64-mingw32 -c -O2 adxenc.c -w.

I’ll push a revert soon.

mstorsjo added a reverting change: rG7717e1114ad3: Revert "[AArch64] Reassociate sub(x, add(m1, m2)) to sub(sub(x, m1), m2)".Feb 12 2023, 6:44 AM

Thanks for the report. I had somehow missed vector constants even though scalars were being correctly handled. I thought I had added tests for them but apparently not.

dmgreen added a commit: rGe9eaee9da196: [AArch64] Reassociate sub(x, add(m1, m2)) to sub(sub(x, m1), m2).Feb 13 2023, 6:35 AM

dmgreen mentioned this in rG8a7b5e0e50de: [AArch64] Guard extra uses in mls combine..Feb 15 2023, 10:37 AM

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64ISelLowering.cpp

28 lines

test/

CodeGen/

AArch64/

arm64-vmul.ll

40 lines

reassocmls.ll

52 lines

Diff 496539

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 17,696 Lines • ▼ Show 20 Lines	static SDValue performAddCombineForShiftedOperands(SDNode *N,
if (isOpcWithIntImmediate(LHS.getNode(), ISD::SHL, LHSImm) &&		if (isOpcWithIntImmediate(LHS.getNode(), ISD::SHL, LHSImm) &&
isOpcWithIntImmediate(RHS.getNode(), ISD::SHL, RHSImm) && LHSImm <= 4 &&		isOpcWithIntImmediate(RHS.getNode(), ISD::SHL, RHSImm) && LHSImm <= 4 &&
RHSImm > 4 && LHS.hasOneUse())		RHSImm > 4 && LHS.hasOneUse())
return DAG.getNode(ISD::ADD, DL, VT, RHS, LHS);		return DAG.getNode(ISD::ADD, DL, VT, RHS, LHS);

return SDValue();		return SDValue();
}		}

		// The mid end will reassociate sub(sub(x, m1), m2) to sub(x, add(m1, m2))
		// This reassociates it back to allow the creation of more mls instructions.
		static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG) {
		if (N->getOpcode() != ISD::SUB)
		return SDValue();
		SDValue Add = N->getOperand(1);
		if (Add.getOpcode() != ISD::ADD)
		return SDValue();

		SDValue X = N->getOperand(0);
		if (isa<ConstantSDNode>(X))
		return SDValue();
		SDValue M1 = Add.getOperand(0);
		fhahnUnsubmitted Not Done Reply Inline Actions It might be good to use the same names here as in the comment above, i.e. `m1` and `m2`? fhahn: It might be good to use the same names here as in the comment above, i.e. `m1` and `m2`?
		SDValue M2 = Add.getOperand(1);
		if (M1.getOpcode() != ISD::MUL && M1.getOpcode() != AArch64ISD::SMULL &&
		M1.getOpcode() != AArch64ISD::UMULL)
		return SDValue();
		if (M2.getOpcode() != ISD::MUL && M2.getOpcode() != AArch64ISD::SMULL &&
		M2.getOpcode() != AArch64ISD::UMULL)
		return SDValue();

		EVT VT = N->getValueType(0);
		SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, X, M1);
		return DAG.getNode(ISD::SUB, SDLoc(N), VT, Sub, M2);
		}

static SDValue performAddSubCombine(SDNode *N,		static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
// Try to change sum of two reductions.		// Try to change sum of two reductions.
if (SDValue Val = performAddUADDVCombine(N, DAG))		if (SDValue Val = performAddUADDVCombine(N, DAG))
return Val;		return Val;
if (SDValue Val = performAddDotCombine(N, DAG))		if (SDValue Val = performAddDotCombine(N, DAG))
return Val;		return Val;
if (SDValue Val = performAddCSelIntoCSinc(N, DAG))		if (SDValue Val = performAddCSelIntoCSinc(N, DAG))
return Val;		return Val;
if (SDValue Val = performNegCSelCombine(N, DAG))		if (SDValue Val = performNegCSelCombine(N, DAG))
return Val;		return Val;
if (SDValue Val = performVectorAddSubExtCombine(N, DAG))		if (SDValue Val = performVectorAddSubExtCombine(N, DAG))
return Val;		return Val;
if (SDValue Val = performAddCombineForShiftedOperands(N, DAG))		if (SDValue Val = performAddCombineForShiftedOperands(N, DAG))
return Val;		return Val;
		if (SDValue Val = performSubAddMULCombine(N, DAG))
		return Val;

return performAddSubLongCombine(N, DCI, DAG);		return performAddSubLongCombine(N, DCI, DAG);
}		}

// Massage DAGs which we can use the high-half "long" operations on into		// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.		// something isel will recognize better. E.g.
//		//
// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->		// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
▲ Show 20 Lines • Show All 6,608 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/arm64-vmul.ll

Show First 20 Lines • Show All 451 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)		%tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp5 = sub <2 x i64> %tmp3, %tmp4		%tmp5 = sub <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5		ret <2 x i64> %tmp5
}		}

define void @smlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {		define void @smlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
; CHECK-LABEL: smlsl8h_chain_with_constant:		; CHECK-LABEL: smlsl8h_chain_with_constant:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: smull.8h v0, v0, v2
; CHECK-NEXT: mvn.8b v2, v2
; CHECK-NEXT: movi.16b v3, #1		; CHECK-NEXT: movi.16b v3, #1
; CHECK-NEXT: smlal.8h v0, v1, v2		; CHECK-NEXT: smlsl.8h v3, v0, v2
; CHECK-NEXT: sub.8h v0, v3, v0		; CHECK-NEXT: mvn.8b v0, v2
; CHECK-NEXT: str q0, [x0]		; CHECK-NEXT: smlsl.8h v3, v1, v0
		; CHECK-NEXT: str q3, [x0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>		%xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%smull.1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v1, <8 x i8> %v3)		%smull.1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v1, <8 x i8> %v3)
%sub.1 = sub <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>, %smull.1		%sub.1 = sub <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>, %smull.1
%smull.2 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v2, <8 x i8> %xor)		%smull.2 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v2, <8 x i8> %xor)
%sub.2 = sub <8 x i16> %sub.1, %smull.2		%sub.2 = sub <8 x i16> %sub.1, %smull.2
store <8 x i16> %sub.2, ptr %dst		store <8 x i16> %sub.2, ptr %dst
ret void		ret void
}		}

define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {		define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
; CHECK-LABEL: smlsl2d_chain_with_constant:		; CHECK-LABEL: smlsl2d_chain_with_constant:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: smull.2d v0, v0, v2
; CHECK-NEXT: mov w8, #257		; CHECK-NEXT: mov w8, #257
; CHECK-NEXT: mvn.8b v2, v2		; CHECK-NEXT: dup.2d v3, x8
; CHECK-NEXT: smlal.2d v0, v1, v2		; CHECK-NEXT: smlsl.2d v3, v0, v2
; CHECK-NEXT: dup.2d v1, x8		; CHECK-NEXT: mvn.8b v0, v2
; CHECK-NEXT: sub.2d v0, v1, v0		; CHECK-NEXT: smlsl.2d v3, v1, v0
; CHECK-NEXT: str q0, [x0]		; CHECK-NEXT: str q3, [x0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%xor = xor <2 x i32> %v3, <i32 -1, i32 -1>		%xor = xor <2 x i32> %v3, <i32 -1, i32 -1>
%smull.1 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v1, <2 x i32> %v3)		%smull.1 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v1, <2 x i32> %v3)
%sub.1 = sub <2 x i64> <i64 257, i64 257>, %smull.1		%sub.1 = sub <2 x i64> <i64 257, i64 257>, %smull.1
%smull.2 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v2, <2 x i32> %xor)		%smull.2 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v2, <2 x i32> %xor)
%sub.2 = sub <2 x i64> %sub.1, %smull.2		%sub.2 = sub <2 x i64> %sub.1, %smull.2
store <2 x i64> %sub.2, ptr %dst		store <2 x i64> %sub.2, ptr %dst
ret void		ret void
▲ Show 20 Lines • Show All 239 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)		%tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp5 = sub <2 x i64> %tmp3, %tmp4		%tmp5 = sub <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5		ret <2 x i64> %tmp5
}		}

define void @umlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {		define void @umlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
; CHECK-LABEL: umlsl8h_chain_with_constant:		; CHECK-LABEL: umlsl8h_chain_with_constant:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: umull.8h v0, v0, v2
; CHECK-NEXT: mvn.8b v2, v2
; CHECK-NEXT: movi.16b v3, #1		; CHECK-NEXT: movi.16b v3, #1
; CHECK-NEXT: umlal.8h v0, v1, v2		; CHECK-NEXT: umlsl.8h v3, v0, v2
; CHECK-NEXT: sub.8h v0, v3, v0		; CHECK-NEXT: mvn.8b v0, v2
; CHECK-NEXT: str q0, [x0]		; CHECK-NEXT: umlsl.8h v3, v1, v0
		; CHECK-NEXT: str q3, [x0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>		%xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%umull.1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v1, <8 x i8> %v3)		%umull.1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v1, <8 x i8> %v3)
%add.1 = sub <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>, %umull.1		%add.1 = sub <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>, %umull.1
%umull.2 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v2, <8 x i8> %xor)		%umull.2 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v2, <8 x i8> %xor)
%add.2 = sub <8 x i16> %add.1, %umull.2		%add.2 = sub <8 x i16> %add.1, %umull.2
store <8 x i16> %add.2, ptr %dst		store <8 x i16> %add.2, ptr %dst
ret void		ret void
}		}

define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {		define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
; CHECK-LABEL: umlsl2d_chain_with_constant:		; CHECK-LABEL: umlsl2d_chain_with_constant:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: umull.2d v0, v0, v2
; CHECK-NEXT: mov w8, #257		; CHECK-NEXT: mov w8, #257
; CHECK-NEXT: mvn.8b v2, v2		; CHECK-NEXT: dup.2d v3, x8
; CHECK-NEXT: umlal.2d v0, v1, v2		; CHECK-NEXT: umlsl.2d v3, v0, v2
; CHECK-NEXT: dup.2d v1, x8		; CHECK-NEXT: mvn.8b v0, v2
; CHECK-NEXT: sub.2d v0, v1, v0		; CHECK-NEXT: umlsl.2d v3, v1, v0
; CHECK-NEXT: str q0, [x0]		; CHECK-NEXT: str q3, [x0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%xor = xor <2 x i32> %v3, <i32 -1, i32 -1>		%xor = xor <2 x i32> %v3, <i32 -1, i32 -1>
%umull.1 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v1, <2 x i32> %v3)		%umull.1 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v1, <2 x i32> %v3)
%add.1 = sub <2 x i64> <i64 257, i64 257>, %umull.1		%add.1 = sub <2 x i64> <i64 257, i64 257>, %umull.1
%umull.2 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v2, <2 x i32> %xor)		%umull.2 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v2, <2 x i32> %xor)
%add.2 = sub <2 x i64> %add.1, %umull.2		%add.2 = sub <2 x i64> %add.1, %umull.2
store <2 x i64> %add.2, ptr %dst		store <2 x i64> %add.2, ptr %dst
ret void		ret void
▲ Show 20 Lines • Show All 2,095 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/reassocmls.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+sve2 \| FileCheck %s		; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+sve2 \| FileCheck %s

define i64 @smlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {		define i64 @smlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
; CHECK-LABEL: smlsl_i64:		; CHECK-LABEL: smlsl_i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: smull x8, w4, w3		; CHECK-NEXT: smsubl x8, w4, w3, x0
; CHECK-NEXT: smaddl x8, w2, w1, x8		; CHECK-NEXT: smsubl x0, w2, w1, x8
; CHECK-NEXT: sub x0, x0, x8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%be = sext i32 %b to i64		%be = sext i32 %b to i64
%ce = sext i32 %c to i64		%ce = sext i32 %c to i64
%de = sext i32 %d to i64		%de = sext i32 %d to i64
%ee = sext i32 %e to i64		%ee = sext i32 %e to i64
%m1.neg = mul nsw i64 %ce, %be		%m1.neg = mul nsw i64 %ce, %be
%m2.neg = mul nsw i64 %ee, %de		%m2.neg = mul nsw i64 %ee, %de
%reass.add = add i64 %m2.neg, %m1.neg		%reass.add = add i64 %m2.neg, %m1.neg
%s2 = sub i64 %a, %reass.add		%s2 = sub i64 %a, %reass.add
ret i64 %s2		ret i64 %s2
}		}

define i64 @umlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {		define i64 @umlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
; CHECK-LABEL: umlsl_i64:		; CHECK-LABEL: umlsl_i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: umull x8, w4, w3		; CHECK-NEXT: umsubl x8, w4, w3, x0
; CHECK-NEXT: umaddl x8, w2, w1, x8		; CHECK-NEXT: umsubl x0, w2, w1, x8
; CHECK-NEXT: sub x0, x0, x8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%be = zext i32 %b to i64		%be = zext i32 %b to i64
%ce = zext i32 %c to i64		%ce = zext i32 %c to i64
%de = zext i32 %d to i64		%de = zext i32 %d to i64
%ee = zext i32 %e to i64		%ee = zext i32 %e to i64
%m1.neg = mul nuw i64 %ce, %be		%m1.neg = mul nuw i64 %ce, %be
%m2.neg = mul nuw i64 %ee, %de		%m2.neg = mul nuw i64 %ee, %de
%reass.add = add i64 %m2.neg, %m1.neg		%reass.add = add i64 %m2.neg, %m1.neg
%s2 = sub i64 %a, %reass.add		%s2 = sub i64 %a, %reass.add
ret i64 %s2		ret i64 %s2
}		}

define i64 @mls_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {		define i64 @mls_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
; CHECK-LABEL: mls_i64:		; CHECK-LABEL: mls_i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: mul x8, x2, x1		; CHECK-NEXT: msub x8, x4, x3, x0
; CHECK-NEXT: madd x8, x4, x3, x8		; CHECK-NEXT: msub x0, x2, x1, x8
; CHECK-NEXT: sub x0, x0, x8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%m1.neg = mul i64 %c, %b		%m1.neg = mul i64 %c, %b
%m2.neg = mul i64 %e, %d		%m2.neg = mul i64 %e, %d
%reass.add = add i64 %m2.neg, %m1.neg		%reass.add = add i64 %m2.neg, %m1.neg
%s2 = sub i64 %a, %reass.add		%s2 = sub i64 %a, %reass.add
ret i64 %s2		ret i64 %s2
}		}

define i16 @mls_i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e) {		define i16 @mls_i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e) {
; CHECK-LABEL: mls_i16:		; CHECK-LABEL: mls_i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: mul w8, w2, w1		; CHECK-NEXT: msub w8, w4, w3, w0
; CHECK-NEXT: madd w8, w4, w3, w8		; CHECK-NEXT: msub w0, w2, w1, w8
; CHECK-NEXT: sub w0, w0, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%m1.neg = mul i16 %c, %b		%m1.neg = mul i16 %c, %b
%m2.neg = mul i16 %e, %d		%m2.neg = mul i16 %e, %d
%reass.add = add i16 %m2.neg, %m1.neg		%reass.add = add i16 %m2.neg, %m1.neg
%s2 = sub i16 %a, %reass.add		%s2 = sub i16 %a, %reass.add
ret i16 %s2		ret i16 %s2
}		}

Show All 24 Lines	; CHECK-NEXT: ret
%s2 = sub i64 10, %reass.add		%s2 = sub i64 10, %reass.add
ret i64 %s2		ret i64 %s2
}		}


define <8 x i16> @smlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) {		define <8 x i16> @smlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) {
; CHECK-LABEL: smlsl_v8i16:		; CHECK-LABEL: smlsl_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: smull v3.8h, v4.8b, v3.8b		; CHECK-NEXT: smlsl v0.8h, v4.8b, v3.8b
; CHECK-NEXT: smlal v3.8h, v2.8b, v1.8b		; CHECK-NEXT: smlsl v0.8h, v2.8b, v1.8b
; CHECK-NEXT: sub v0.8h, v0.8h, v3.8h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%be = sext <8 x i8> %b to <8 x i16>		%be = sext <8 x i8> %b to <8 x i16>
%ce = sext <8 x i8> %c to <8 x i16>		%ce = sext <8 x i8> %c to <8 x i16>
%de = sext <8 x i8> %d to <8 x i16>		%de = sext <8 x i8> %d to <8 x i16>
%ee = sext <8 x i8> %e to <8 x i16>		%ee = sext <8 x i8> %e to <8 x i16>
%m1.neg = mul nsw <8 x i16> %ce, %be		%m1.neg = mul nsw <8 x i16> %ce, %be
%m2.neg = mul nsw <8 x i16> %ee, %de		%m2.neg = mul nsw <8 x i16> %ee, %de
%reass.add = add <8 x i16> %m2.neg, %m1.neg		%reass.add = add <8 x i16> %m2.neg, %m1.neg
%s2 = sub <8 x i16> %a, %reass.add		%s2 = sub <8 x i16> %a, %reass.add
ret <8 x i16> %s2		ret <8 x i16> %s2
}		}

define <8 x i16> @umlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) {		define <8 x i16> @umlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) {
; CHECK-LABEL: umlsl_v8i16:		; CHECK-LABEL: umlsl_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: umull v3.8h, v4.8b, v3.8b		; CHECK-NEXT: umlsl v0.8h, v4.8b, v3.8b
; CHECK-NEXT: umlal v3.8h, v2.8b, v1.8b		; CHECK-NEXT: umlsl v0.8h, v2.8b, v1.8b
; CHECK-NEXT: sub v0.8h, v0.8h, v3.8h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%be = zext <8 x i8> %b to <8 x i16>		%be = zext <8 x i8> %b to <8 x i16>
%ce = zext <8 x i8> %c to <8 x i16>		%ce = zext <8 x i8> %c to <8 x i16>
%de = zext <8 x i8> %d to <8 x i16>		%de = zext <8 x i8> %d to <8 x i16>
%ee = zext <8 x i8> %e to <8 x i16>		%ee = zext <8 x i8> %e to <8 x i16>
%m1.neg = mul nuw <8 x i16> %ce, %be		%m1.neg = mul nuw <8 x i16> %ce, %be
%m2.neg = mul nuw <8 x i16> %ee, %de		%m2.neg = mul nuw <8 x i16> %ee, %de
%reass.add = add <8 x i16> %m2.neg, %m1.neg		%reass.add = add <8 x i16> %m2.neg, %m1.neg
%s2 = sub <8 x i16> %a, %reass.add		%s2 = sub <8 x i16> %a, %reass.add
ret <8 x i16> %s2		ret <8 x i16> %s2
}		}

define <8 x i16> @mls_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) {		define <8 x i16> @mls_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) {
; CHECK-LABEL: mls_v8i16:		; CHECK-LABEL: mls_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: mul v1.8h, v2.8h, v1.8h		; CHECK-NEXT: mls v0.8h, v4.8h, v3.8h
; CHECK-NEXT: mla v1.8h, v4.8h, v3.8h		; CHECK-NEXT: mls v0.8h, v2.8h, v1.8h
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%m1.neg = mul <8 x i16> %c, %b		%m1.neg = mul <8 x i16> %c, %b
%m2.neg = mul <8 x i16> %e, %d		%m2.neg = mul <8 x i16> %e, %d
%reass.add = add <8 x i16> %m2.neg, %m1.neg		%reass.add = add <8 x i16> %m2.neg, %m1.neg
%s2 = sub <8 x i16> %a, %reass.add		%s2 = sub <8 x i16> %a, %reass.add
ret <8 x i16> %s2		ret <8 x i16> %s2
}		}

Show All 14 Lines
define <vscale x 8 x i16> @smlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) {		define <vscale x 8 x i16> @smlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) {
; CHECK-LABEL: smlsl_nxv8i16:		; CHECK-LABEL: smlsl_nxv8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h		; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sxtb z3.h, p0/m, z3.h		; CHECK-NEXT: sxtb z3.h, p0/m, z3.h
; CHECK-NEXT: sxtb z4.h, p0/m, z4.h		; CHECK-NEXT: sxtb z4.h, p0/m, z4.h
; CHECK-NEXT: sxtb z1.h, p0/m, z1.h		; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
; CHECK-NEXT: sxtb z2.h, p0/m, z2.h		; CHECK-NEXT: sxtb z2.h, p0/m, z2.h
; CHECK-NEXT: mul z3.h, z4.h, z3.h		; CHECK-NEXT: mls z0.h, p0/m, z4.h, z3.h
; CHECK-NEXT: mla z3.h, p0/m, z2.h, z1.h		; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: sub z0.h, z0.h, z3.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%be = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>		%be = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
%ce = sext <vscale x 8 x i8> %c to <vscale x 8 x i16>		%ce = sext <vscale x 8 x i8> %c to <vscale x 8 x i16>
%de = sext <vscale x 8 x i8> %d to <vscale x 8 x i16>		%de = sext <vscale x 8 x i8> %d to <vscale x 8 x i16>
%ee = sext <vscale x 8 x i8> %e to <vscale x 8 x i16>		%ee = sext <vscale x 8 x i8> %e to <vscale x 8 x i16>
%m1.neg = mul nsw <vscale x 8 x i16> %ce, %be		%m1.neg = mul nsw <vscale x 8 x i16> %ce, %be
%m2.neg = mul nsw <vscale x 8 x i16> %ee, %de		%m2.neg = mul nsw <vscale x 8 x i16> %ee, %de
%reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg		%reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg
%s2 = sub <vscale x 8 x i16> %a, %reass.add		%s2 = sub <vscale x 8 x i16> %a, %reass.add
ret <vscale x 8 x i16> %s2		ret <vscale x 8 x i16> %s2
}		}

define <vscale x 8 x i16> @umlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) {		define <vscale x 8 x i16> @umlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) {
; CHECK-LABEL: umlsl_nxv8i16:		; CHECK-LABEL: umlsl_nxv8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
		; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z3.h, z3.h, #0xff		; CHECK-NEXT: and z3.h, z3.h, #0xff
; CHECK-NEXT: and z4.h, z4.h, #0xff		; CHECK-NEXT: and z4.h, z4.h, #0xff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z1.h, z1.h, #0xff		; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: and z2.h, z2.h, #0xff		; CHECK-NEXT: and z2.h, z2.h, #0xff
; CHECK-NEXT: mul z3.h, z4.h, z3.h		; CHECK-NEXT: mls z0.h, p0/m, z4.h, z3.h
; CHECK-NEXT: mla z3.h, p0/m, z2.h, z1.h		; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: sub z0.h, z0.h, z3.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%be = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>		%be = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
%ce = zext <vscale x 8 x i8> %c to <vscale x 8 x i16>		%ce = zext <vscale x 8 x i8> %c to <vscale x 8 x i16>
%de = zext <vscale x 8 x i8> %d to <vscale x 8 x i16>		%de = zext <vscale x 8 x i8> %d to <vscale x 8 x i16>
%ee = zext <vscale x 8 x i8> %e to <vscale x 8 x i16>		%ee = zext <vscale x 8 x i8> %e to <vscale x 8 x i16>
%m1.neg = mul nuw <vscale x 8 x i16> %ce, %be		%m1.neg = mul nuw <vscale x 8 x i16> %ce, %be
%m2.neg = mul nuw <vscale x 8 x i16> %ee, %de		%m2.neg = mul nuw <vscale x 8 x i16> %ee, %de
%reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg		%reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg
%s2 = sub <vscale x 8 x i16> %a, %reass.add		%s2 = sub <vscale x 8 x i16> %a, %reass.add
ret <vscale x 8 x i16> %s2		ret <vscale x 8 x i16> %s2
}		}

define <vscale x 8 x i16> @mls_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e) {		define <vscale x 8 x i16> @mls_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e) {
; CHECK-LABEL: mls_nxv8i16:		; CHECK-LABEL: mls_nxv8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h		; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mul z3.h, z4.h, z3.h		; CHECK-NEXT: mls z0.h, p0/m, z4.h, z3.h
; CHECK-NEXT: mla z3.h, p0/m, z2.h, z1.h		; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: sub z0.h, z0.h, z3.h
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%m1.neg = mul <vscale x 8 x i16> %c, %b		%m1.neg = mul <vscale x 8 x i16> %c, %b
%m2.neg = mul <vscale x 8 x i16> %e, %d		%m2.neg = mul <vscale x 8 x i16> %e, %d
%reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg		%reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg
%s2 = sub <vscale x 8 x i16> %a, %reass.add		%s2 = sub <vscale x 8 x i16> %a, %reass.add
ret <vscale x 8 x i16> %s2		ret <vscale x 8 x i16> %s2
}		}

Show All 13 Lines