This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AArch64/
-
Target/
-
AArch64/
-
AArch64ISelLowering.cpp
-
test/CodeGen/AArch64/
-
CodeGen/
-
AArch64/
-
aarch64-addv.ll
-
vecreduce-umax-legalization.ll

Differential D88161

[AArch64] No NEON instructions to support vector of i64 reductions
ClosedPublic

Authored by cameron.mcinally on Sep 23 2020, 9:30 AM.

Download Raw Diff

Details

Reviewers

aemerson
paulwalker-arm
efriedma
t.p.northover

Commits

rGe8413ac97f6c: [AArch64] Expand some vector of i64 reductions on NEON

Summary

There are no NEON instructions to support vector of i64 reductions.

It could be possible to support v2i64 ISD::VECREDUC_ADD with AArch64ISD::UADDLV, but that's probably better left for a separate patch.

Does this change need an XFAIL test? They're not currently tested, so wasn't sure on the protocol for that.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

cameron.mcinally created this revision.Sep 23 2020, 9:30 AM

Herald added a project: Restricted Project. · View Herald TranscriptSep 23 2020, 9:30 AM

Herald added subscribers: llvm-commits, danielkiss, hiraditya, kristof.beyls. · View Herald Transcript

cameron.mcinally requested review of this revision.Sep 23 2020, 9:30 AM

Harbormaster completed remote builds in B72678: Diff 293768.Sep 23 2020, 10:12 AM

It appears lowering for v2i64 VECREDUCE_ADD currently works correctly; it produces an addp. It appears v2i64 VECREDUCE_SMAX is currently broken. Does this patch change either of those?

You're right about v2i64 UADDV. The existing test didn't catch my regression, so I added a new CHECK line to that test. Also added a line to Custom lower that as before.

v2i64 *MAXV currently gives a Cannot select. That makes sense since *MAXP also doesn't support i64.

v2i64 *MAXV currently gives a Cannot select. That makes sense since *MAXP also doesn't support i64.

What happens if someone tries to use llvm.experimental.vector.reduce.smax.v2i64 with this patch? I guess this change stops us from generating MAXV, but what happens instead? Can we write a test for that?

How about something like this? Not really exhaustive coverage, but it matches the current legalization tests for the reduce intrinsics.

LGTM

This revision is now accepted and ready to land.Sep 23 2020, 1:10 PM

Closed by commit rGe8413ac97f6c: [AArch64] Expand some vector of i64 reductions on NEON (authored by cameron.mcinally). · Explain WhySep 23 2020, 2:01 PM

This revision was automatically updated to reflect the committed changes.

cameron.mcinally added a commit: rGe8413ac97f6c: [AArch64] Expand some vector of i64 reductions on NEON.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64ISelLowering.cpp

20 lines

test/

CodeGen/

AArch64/

aarch64-addv.ll

1 line

vecreduce-umax-legalization.ll

14 lines

Diff 293858

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 883 Lines • ▼ Show 20 Lines	if (Subtarget->hasNEON()) {

// AArch64 doesn't have MUL.2d:		// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);		setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quad-vector types to detect MULL.		// Custom handling for some quad-vector types to detect MULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);		setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);		setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);		setOperationAction(ISD::MUL, MVT::v2i64, Custom);

		// Saturates
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,		for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {		MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
// Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);

// Saturates
setOperationAction(ISD::SADDSAT, VT, Legal);		setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);		setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);		setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);		setOperationAction(ISD::USUBSAT, VT, Legal);
}		}

		// Vector reductions
for (MVT VT : { MVT::v4f16, MVT::v2f32,		for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {		MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
}		}
		for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
		MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
		setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
		setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
		setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
		setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
		}
		setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);

setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);		setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);		setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled		// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.		// directly.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {		for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);		setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

▲ Show 20 Lines • Show All 15,123 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/aarch64-addv.ll

Show All 27 Lines	; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
%bin.rdx = load <4 x i32>, <4 x i32>* %arr		%bin.rdx = load <4 x i32>, <4 x i32>* %arr
%r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %bin.rdx)		%r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %bin.rdx)
ret i32 %r		ret i32 %r
}		}

define i64 @add_D(<2 x i64>* %arr) {		define i64 @add_D(<2 x i64>* %arr) {
; CHECK-LABEL: add_D		; CHECK-LABEL: add_D
; CHECK-NOT: addv		; CHECK-NOT: addv
		; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
%bin.rdx = load <2 x i64>, <2 x i64>* %arr		%bin.rdx = load <2 x i64>, <2 x i64>* %arr
%r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %bin.rdx)		%r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %bin.rdx)
ret i64 %r		ret i64 %r
}		}

declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)		declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)

define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) {		define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) {
Show All 26 Lines

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s --check-prefix=CHECK			; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s --check-prefix=CHECK

	declare i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a)			declare i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a)
	declare i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a)			declare i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a)
	declare i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a)			declare i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a)
	declare i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a)			declare i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a)
	declare i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a)			declare i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a)
	declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)			declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)
	declare i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)			declare i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)

				declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a)
	declare i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a)			declare i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a)
	declare i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a)			declare i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a)
	declare i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a)			declare i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a)
	declare i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a)			declare i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a)
	declare i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a)			declare i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a)
	declare i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a)			declare i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a)
	declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a)			declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a)

	▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines
	define i128 @test_v1i128(<1 x i128> %a) nounwind {			define i128 @test_v1i128(<1 x i128> %a) nounwind {
	; CHECK-LABEL: test_v1i128:			; CHECK-LABEL: test_v1i128:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)			%b = call i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)
	ret i128 %b			ret i128 %b
	}			}

				; No i64 vector support for UMAX.
				define i64 @test_v2i64(<2 x i64> %a) nounwind {
				; CHECK-LABEL: test_v2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov x8, v0.d[1]
				; CHECK-NEXT: fmov x9, d0
				; CHECK-NEXT: cmp x9, x8
				; CHECK-NEXT: csel x0, x9, x8, hi
				; CHECK-NEXT: ret
				%b = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a)
				ret i64 %b
				}

	define i8 @test_v3i8(<3 x i8> %a) nounwind {			define i8 @test_v3i8(<3 x i8> %a) nounwind {
	; CHECK-LABEL: test_v3i8:			; CHECK-LABEL: test_v3i8:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: movi d0, #0000000000000000			; CHECK-NEXT: movi d0, #0000000000000000
	; CHECK-NEXT: mov v0.h[0], w0			; CHECK-NEXT: mov v0.h[0], w0
	; CHECK-NEXT: mov v0.h[1], w1			; CHECK-NEXT: mov v0.h[1], w1
	; CHECK-NEXT: mov v0.h[2], w2			; CHECK-NEXT: mov v0.h[2], w2
	; CHECK-NEXT: bic v0.4h, #255, lsl #8			; CHECK-NEXT: bic v0.4h, #255, lsl #8
	▲ Show 20 Lines • Show All 85 Lines • Show Last 20 Lines