This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AArch64/
-
Target/
-
AArch64/
4/8
AArch64ISelLowering.cpp
-
test/CodeGen/AArch64/
-
CodeGen/
-
AArch64/
-
concat-vector.ll
1/2
neon-widen-shuffle.ll
-
sve-fixed-length-concat.ll

Differential D111619

[AArch64] Improve shuffle vector by using wider types
ClosedPublic

Authored by wwei on Oct 12 2021, 1:46 AM.

Download Raw Diff

Details

Reviewers

dmgreen
t.p.northover
efriedma
paulwalker-arm
sdesmalen
david-arm
fhahn

Commits

rGf5056c8c16bb: [AArch64] Improve shuffle vector by using wider types

Summary

Try to widen element type to get a new mask value for a better permutation
sequence, so that we can use NEON shuffle instructions, such as zip1/2,
UZP1/2, TRN1/2, REV, INS, etc.
For example:

`shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3>`

is equivalent to:

`shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>`

Finally, we can get:

`mov     v0.d[0], v1.d[1]`

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

wwei created this revision.Oct 12 2021, 1:46 AM

Herald added subscribers: hiraditya, kristof.beyls. · View Herald TranscriptOct 12 2021, 1:46 AM

wwei requested review of this revision.Oct 12 2021, 1:46 AM

Herald added a subscriber: llvm-commits. · View Herald TranscriptOct 12 2021, 1:46 AM

Harbormaster completed remote builds in B128297: Diff 378919.Oct 12 2021, 2:31 AM

Hello. This sounds like a nice idea.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
9580	Perhaps add a comment explaining the function.
9632	Can this happen?
9639	`> 32` is probably enough of a check (and != 1 is a good check too). If we are combining adjacent elements, the most we can combine are two i32's into an i64. I think it's the same thing due to legal types, but is a little more clear. The comment above could do with being reworded to be clearer too.
9646	Perhaps make a variable for ScalarVT.getFixedSizeInBits() (or VT.getScalarSizeInBits() which I think should be the same thing)
llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
2	Can you use the update_llc_test_checks script?

wwei updated this revision to Diff 379352.Oct 13 2021, 5:34 AM

wwei added inline comments.Oct 13 2021, 5:37 AM

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
9580	added
9632	removed
9639	update the comment
9646	add a new variable `unsigned ElementSize`
llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
2	updated.

Harbormaster completed remote builds in B128593: Diff 379352.Oct 13 2021, 6:22 AM

Thanks. LGTM

This revision is now accepted and ready to land.Oct 14 2021, 12:24 PM

Closed by commit rGf5056c8c16bb: [AArch64] Improve shuffle vector by using wider types (authored by wwei). · Explain WhyOct 18 2021, 6:25 AM

This revision was automatically updated to reflect the committed changes.

wwei added a commit: rGf5056c8c16bb: [AArch64] Improve shuffle vector by using wider types.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64ISelLowering.cpp

83 lines

test/

CodeGen/

AArch64/

concat-vector.ll

3 lines

neon-widen-shuffle.ll

179 lines

sve-fixed-length-concat.ll

3 lines

Diff 380367

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 9,571 Lines • ▼ Show 20 Lines	if (getScaledOffsetDup(V, Lane, CastVT)) {
V = WidenVector(V.getOperand(Idx), DAG);		V = WidenVector(V.getOperand(Idx), DAG);
} else if (VT.getSizeInBits() == 64) {		} else if (VT.getSizeInBits() == 64) {
// Widen the operand to 128-bit register with undef.		// Widen the operand to 128-bit register with undef.
V = WidenVector(V, DAG);		V = WidenVector(V, DAG);
}		}
return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));		return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
}		}

		// Return true if we can get a new shuffle mask by checking the parameter mask
		dmgreenUnsubmitted Not Done Reply Inline Actions Perhaps add a comment explaining the function. dmgreen: Perhaps add a comment explaining the function.
		wweiAuthorUnsubmitted Done Reply Inline Actions added wwei: added
		// array to test whether every two adjacent mask values are continuous and
		// starting from an even number.
		static bool isWideTypeMask(ArrayRef<int> M, EVT VT,
		SmallVectorImpl<int> &NewMask) {
		unsigned NumElts = VT.getVectorNumElements();
		if (NumElts % 2 != 0)
		return false;

		NewMask.clear();
		for (unsigned i = 0; i < NumElts; i += 2) {
		int M0 = M[i];
		int M1 = M[i + 1];

		// If both elements are undef, new mask is undef too.
		if (M0 == -1 && M1 == -1) {
		NewMask.push_back(-1);
		continue;
		}

		if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
		NewMask.push_back(M1 / 2);
		continue;
		}

		if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 \|\| M1 == -1)) {
		NewMask.push_back(M0 / 2);
		continue;
		}

		NewMask.clear();
		return false;
		}

		assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
		return true;
		}

		// Try to widen element type to get a new mask value for a better permutation
		// sequence, so that we can use NEON shuffle instructions, such as zip1/2,
		// UZP1/2, TRN1/2, REV, INS, etc.
		// For example:
		// shufflevector <4 x i32> %a, <4 x i32> %b,
		// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
		// is equivalent to:
		// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
		// Finally, we can get:
		// mov v0.d[0], v1.d[1]
		static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
		SDLoc DL(Op);
		EVT VT = Op.getValueType();
		EVT ScalarVT = VT.getVectorElementType();
		unsigned ElementSize = ScalarVT.getFixedSizeInBits();
		dmgreenUnsubmitted Not Done Reply Inline Actions Can this happen? dmgreen: Can this happen?
		wweiAuthorUnsubmitted Done Reply Inline Actions removed wwei: removed
		SDValue V0 = Op.getOperand(0);
		SDValue V1 = Op.getOperand(1);
		ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();

		// If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
		// We need to make sure the wider element type is legal. Thus, ElementSize
		// should be not larger than 32 bits, and i1 type should also be excluded.
		dmgreenUnsubmitted Not Done Reply Inline Actions `> 32` is probably enough of a check (and != 1 is a good check too). If we are combining adjacent elements, the most we can combine are two i32's into an i64. I think it's the same thing due to legal types, but is a little more clear. The comment above could do with being reworded to be clearer too. dmgreen: `> 32` is probably enough of a check (and != 1 is a good check too). If we are combining…
		wweiAuthorUnsubmitted Done Reply Inline Actions update the comment wwei: update the comment
		if (ElementSize > 32 \|\| ElementSize == 1)
		return SDValue();

		SmallVector<int, 8> NewMask;
		if (isWideTypeMask(Mask, VT, NewMask)) {
		MVT NewEltVT = VT.isFloatingPoint()
		? MVT::getFloatingPointVT(ElementSize * 2)
		dmgreenUnsubmitted Not Done Reply Inline Actions Perhaps make a variable for ScalarVT.getFixedSizeInBits() (or VT.getScalarSizeInBits() which I think should be the same thing) dmgreen: Perhaps make a variable for ScalarVT.getFixedSizeInBits() (or VT.getScalarSizeInBits() which I…
		wweiAuthorUnsubmitted Done Reply Inline Actions add a new variable `unsigned ElementSize` wwei: add a new variable `unsigned ElementSize`
		: MVT::getIntegerVT(ElementSize * 2);
		MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
		if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
		V0 = DAG.getBitcast(NewVT, V0);
		V1 = DAG.getBitcast(NewVT, V1);
		return DAG.getBitcast(VT,
		DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
		}
		}

		return SDValue();
		}

SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,		SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
SDLoc dl(Op);		SDLoc dl(Op);
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();

ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());		ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

if (useSVEForFixedLengthVectorVT(VT))		if (useSVEForFixedLengthVectorVT(VT))
▲ Show 20 Lines • Show All 131 Lines • ▼ Show 20 Lines	if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
ScalarVT = MVT::i32;		ScalarVT = MVT::i32;

return DAG.getNode(		return DAG.getNode(
ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,		ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),		DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
DstLaneV);		DstLaneV);
}		}

		if (SDValue NewSD = tryWidenMaskForShuffle(Op, DAG))
		return NewSD;

// If the shuffle is not directly supported and it has 4 elements, use		// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.		// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();		unsigned NumElts = VT.getVectorNumElements();
if (NumElts == 4) {		if (NumElts == 4) {
unsigned PFIndexes[4];		unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {		for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)		if (ShuffleMask[i] < 0)
PFIndexes[i] = 8;		PFIndexes[i] = 8;
▲ Show 20 Lines • Show All 9,384 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/concat-vector.ll

Show First 20 Lines • Show All 82 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%tmp2 = load <4 x i32>, <4 x i32>* %B		%tmp2 = load <4 x i32>, <4 x i32>* %B
%v8i32 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>		%v8i32 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i32> %v8i32		ret <8 x i32> %v8i32
}		}

define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {		define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
; CHECK-LABEL: concat9:		; CHECK-LABEL: concat9:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #4		; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>		%v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x half> %v4half		ret <4 x half> %v4half
}		}

define <8 x half> @concat10(<4 x half> %A, <4 x half> %B) {		define <8 x half> @concat10(<4 x half> %A, <4 x half> %B) {
; CHECK-LABEL: concat10:		; CHECK-LABEL: concat10:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
Show All 15 Lines

llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s
				dmgreenUnsubmitted Not Done Reply Inline Actions Can you use the update_llc_test_checks script? dmgreen: Can you use the update_llc_test_checks script?
				wweiAuthorUnsubmitted Done Reply Inline Actions updated. wwei: updated.

				define <4 x half> @shuffle1(<2 x half> %a, <2 x half> %b) {
				; CHECK-LABEL: shuffle1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> <i32 2, i32 3, i32 0, i32 undef>
				ret <4 x half> %res
				}

				define <4 x half> @shuffle2(<2 x half> %a, <2 x half> %b) {
				; CHECK-LABEL: shuffle2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <2 x half> %a, <2 x half> %b, <4 x i32> <i32 undef, i32 1, i32 2, i32 undef>
				ret <4 x half> %res
				}

				define <4 x i32> @shuffle3(<4 x i32> %a, <4 x i32> %b) {
				; CHECK-LABEL: shuffle3:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov v0.d[0], v1.d[1]
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
				ret <4 x i32> %res
				}

				define <4 x float> @shuffle4(<4 x float> %a, <4 x float> %b) {
				; CHECK-LABEL: shuffle4:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov v0.d[1], v1.d[1]
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
				ret <4 x float> %res
				}

				define <16 x i8> @shuffle5(<16 x i8> %a, <16 x i8> %b) {
				; CHECK-LABEL: shuffle5:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 4, i32 5,
				i32 8, i32 9, i32 12, i32 13,
				i32 16, i32 17, i32 20, i32 21,
				i32 24, i32 25, i32 28, i32 29>
				ret <16 x i8> %res
				}

				define <16 x i8> @shuffle6(<16 x i8> %a, <16 x i8> %b) {
				; CHECK-LABEL: shuffle6:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: trn1 v0.8h, v0.8h, v1.8h
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 16, i32 17,
				i32 4, i32 5, i32 20, i32 21,
				i32 8, i32 9, i32 24, i32 25,
				i32 12, i32 13, i32 28, i32 29>
				ret <16 x i8> %res
				}

				define <8 x i8> @shuffle7(<8 x i8> %a, <8 x i8> %b) {
				; CHECK-LABEL: shuffle7:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: uzp2 v0.4h, v0.4h, v1.4h
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 6, i32 undef,
				i32 undef, i32 11, i32 14, i32 undef>
				ret <8 x i8> %res
				}

				define <8 x i8> @shuffle8(<8 x i8> %a, <8 x i8> %b) {
				; CHECK-LABEL: shuffle8:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: trn2 v0.4h, v0.4h, v1.4h
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 10, i32 undef,
				i32 undef, i32 7, i32 14, i32 undef>
				ret <8 x i8> %res
				}

				; No blocks
				define <8 x i8> @shuffle9(<8 x i8> %a) {
				; CHECK-LABEL: shuffle9:
				; CHECK: // %bb.0:
				; CHECK-NEXT: rev32 v0.4h, v0.4h
				; CHECK-NEXT: ret
				%res = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1,
				i32 6, i32 7, i32 4, i32 5>
				ret <8 x i8> %res
				}

				define <8 x i16> @shuffle10(<8 x i16> %a) {
				; CHECK-LABEL: shuffle10:
				; CHECK: // %bb.0:
				; CHECK-NEXT: rev64 v0.4s, v0.4s
				; CHECK-NEXT: ret
				%res = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1,
				i32 undef, i32 undef, i32 4, i32 5>
				ret <8 x i16> %res
				}

				define <4 x i16> @shuffle11(<8 x i16> %a, <8 x i16> %b) {
				; CHECK-LABEL: shuffle11:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov v1.s[1], v0.s[0]
				; CHECK-NEXT: fmov d0, d1
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 8, i32 9, i32 0, i32 1>
				ret <4 x i16> %res
				}

				define <8 x i8> @shuffle12(<8 x i8> %a, <8 x i8> %b) {
				; CHECK-LABEL: shuffle12:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
				; CHECK-NEXT: trn2 v0.4h, v0.4h, v0.4h
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 5, i32 4, i32 undef,
				i32 undef, i32 13, i32 12, i32 undef>
				ret <8 x i8> %res
				}

				define <8 x i16> @shuffle_widen_faili1(<4 x i16> %a, <4 x i16> %b) {
				; CHECK-LABEL: shuffle_widen_faili1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: rev32 v2.4h, v0.4h
				; CHECK-NEXT: rev32 v3.4h, v1.4h
				; CHECK-NEXT: ext v1.8b, v2.8b, v1.8b, #4
				; CHECK-NEXT: ext v0.8b, v3.8b, v0.8b, #4
				; CHECK-NEXT: mov v0.d[1], v1.d[0]
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 7, i32 6, i32 0, i32 1,
				i32 3, i32 2, i32 4, i32 5>
				ret <8 x i16> %res
				}

				define <8 x i16> @shuffle_widen_fail2(<4 x i16> %a, <4 x i16> %b) {
				; CHECK-LABEL: shuffle_widen_fail2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h
				; CHECK-NEXT: trn1 v3.4h, v1.4h, v1.4h
				; CHECK-NEXT: ext v1.8b, v2.8b, v1.8b, #4
				; CHECK-NEXT: ext v0.8b, v3.8b, v0.8b, #4
				; CHECK-NEXT: mov v0.d[1], v1.d[0]
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 6, i32 6, i32 0, i32 1,
				i32 undef, i32 2, i32 4, i32 5>
				ret <8 x i16> %res
				}

				define <8 x i16> @shuffle_widen_fail3(<8 x i16> %a, <8 x i16> %b) {
				; CHECK-LABEL: shuffle_widen_fail3:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: adrp x8, .LCPI14_0
				; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
				; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
				; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
				; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
				; CHECK-NEXT: ret
				entry:
				%res = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 5, i32 12, i32 14,
				i32 10, i32 6, i32 7, i32 13>
				ret <8 x i16> %res
				}

llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll

	Show First 20 Lines • Show All 454 Lines • ▼ Show 20 Lines

	;			;
	; f16			; f16
	;			;

	; Don't use SVE for 64-bit vectors.			; Don't use SVE for 64-bit vectors.
	define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) #0 {			define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) #0 {
	; CHECK-LABEL: concat_v4f16:			; CHECK-LABEL: concat_v4f16:
	; CHECK: ext v0.8b, v0.8b, v0.8b, #4			; CHECK: zip1 v0.2s, v0.2s, v1.2s
	; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = shufflevector <2 x half> %op1, <2 x half> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			%res = shufflevector <2 x half> %op1, <2 x half> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	ret <4 x half> %res			ret <4 x half> %res
	}			}

	; Don't use SVE for 128-bit vectors.			; Don't use SVE for 128-bit vectors.
	define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) #0 {			define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) #0 {
	; CHECK-LABEL: concat_v8f16:			; CHECK-LABEL: concat_v8f16:
	▲ Show 20 Lines • Show All 392 Lines • Show Last 20 Lines