This is an archive of the discontinued LLVM Phabricator instance.

[ARM] - Fix lowering of shufflevectors in AArch32
ClosedPublic

Authored by LukeCheeseman on Jul 22 2015, 3:14 AM.

Download Raw Diff

Details

Reviewers

Commits

rG4d45ff2b8701: [ARM] - Fix lowering of shufflevectors in AArch32
rL243103: [ARM] - Fix lowering of shufflevectors in AArch32

Summary

Some shufflevectors are currently being incorrectly lowered in the AArch32 backend as the existing checks for detecting the NEON operations from the shufflevector instruction expects the shuffle mask and the vector operands to be of the same length.

This is not always the case as the mask may be twice as long as the operand; here only the lower half of the shufflemask gets checked, so provided the lower half of the shufflemask looks like a vector transpose (or even is just all -1 for undef) then the intrinsics may get incorrectly lowered into a vector transpose (VTRN) instruction.

This patch fixes this by accommodating for both cases and adds regression tests.

Diff Detail

Event Timeline

LukeCheeseman updated this revision to Diff 30333.Jul 22 2015, 3:14 AM

LukeCheeseman retitled this revision from to [ARM] - Fix lowering of shufflevectors in AArch32.

LukeCheeseman updated this object.

LukeCheeseman added a subscriber: llvm-commits.

Herald added subscribers: rengolin, aemerson. · View Herald TranscriptJul 22 2015, 3:14 AM

Hi Charlie,

The patch looks good to me, but it seems like it could do with some more tests. For example, you have undef for vext but not defined. And the other way around for vzip. You also changed vuzp masks, but haven't added double-size tests for them.

I'm expecting that the regular sized vtrn/vzip/vuzp tests are already somewhere else. Why did you create a new test for these? Why not just append on the existing one?

cheers,
--renato

Adding more tests to check lowering of more shufflemask patterns. Also, slight change to how WhichResult is set, if the first element of the shufflemask was undef then it would incorrectly set WhichResult to 1 when it should be 0 (in the double length shufflemask case)

LGTM. Thanks!

This revision is now accepted and ready to land.Jul 23 2015, 1:07 PM

Closed by commit rL243103: [ARM] - Fix lowering of shufflevectors in AArch32 (authored by LukeCheeseman). · Explain WhyJul 24 2015, 2:57 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

Target/

ARM/

ARMISelLowering.cpp

165 lines

test/

CodeGen/

ARM/

32 lines

10 lines

21 lines

31 lines

Diff 30507

lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 5,037 Lines • ▼ Show 20 Lines

	static bool isVTBLMask(ArrayRef<int> M, EVT VT) {			static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
	// We can handle <8 x i8> vector shuffles. If the index in the mask is out of			// We can handle <8 x i8> vector shuffles. If the index in the mask is out of
	// range, then 0 is placed into the resulting vector. So pretty much any mask			// range, then 0 is placed into the resulting vector. So pretty much any mask
	// of 8 elements can work here.			// of 8 elements can work here.
	return VT == MVT::v8i8 && M.size() == 8;			return VT == MVT::v8i8 && M.size() == 8;
	}			}

				// Checks whether the shuffle mask represents a vector transpose (VTRN) by
				// checking that pairs of elements in the shuffle mask represent the same index
				// in each vector, incrementing the expected index by 2 at each step.
				// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
				// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
				// v2={e,f,g,h}
				// WhichResult gives the offset for each element in the mask based on which
				// of the two results it belongs to.
				//
				// The transpose can be represented either as:
				// result1 = shufflevector v1, v2, result1_shuffle_mask
				// result2 = shufflevector v1, v2, result2_shuffle_mask
				// where v1/v2 and the shuffle masks have the same number of elements
				// (here WhichResult (see below) indicates which result is being checked)
				//
				// or as:
				// results = shufflevector v1, v2, shuffle_mask
				// where both results are returned in one vector and the shuffle mask has twice
				// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
				// want to check the low half and high half of the shuffle mask as if it were
				// the other case
	static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {			static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned EltSz = VT.getVectorElementType().getSizeInBits();			unsigned EltSz = VT.getVectorElementType().getSizeInBits();
	if (EltSz == 64)			if (EltSz == 64)
	return false;			return false;

	unsigned NumElts = VT.getVectorNumElements();			unsigned NumElts = VT.getVectorNumElements();
	WhichResult = (M[0] == 0 ? 0 : 1);			if (M.size() != NumElts && M.size() != NumElts*2)
	for (unsigned i = 0; i < NumElts; i += 2) {			return false;
	if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) \|\|
	(M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))			// If the mask is twice as long as the result then we need to check the upper
				// and lower parts of the mask
				for (unsigned i = 0; i < M.size(); i += NumElts) {
				WhichResult = M[i] == 0 ? 0 : 1;
				for (unsigned j = 0; j < NumElts; j += 2) {
				if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) \|\|
				(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
	return false;			return false;
	}			}
				}

				if (M.size() == NumElts*2)
				WhichResult = 0;

	return true;			return true;
	}			}

	/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of			/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".			/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.			/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
	static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){			static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
	unsigned EltSz = VT.getVectorElementType().getSizeInBits();			unsigned EltSz = VT.getVectorElementType().getSizeInBits();
	if (EltSz == 64)			if (EltSz == 64)
	return false;			return false;

	unsigned NumElts = VT.getVectorNumElements();			unsigned NumElts = VT.getVectorNumElements();
	WhichResult = (M[0] == 0 ? 0 : 1);			if (M.size() != NumElts && M.size() != NumElts*2)
	for (unsigned i = 0; i < NumElts; i += 2) {			return false;
	if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) \|\|
	(M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))			for (unsigned i = 0; i < M.size(); i += NumElts) {
				WhichResult = M[i] == 0 ? 0 : 1;
				for (unsigned j = 0; j < NumElts; j += 2) {
				if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) \|\|
				(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
	return false;			return false;
	}			}
				}

				if (M.size() == NumElts*2)
				WhichResult = 0;

	return true;			return true;
	}			}

				// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
				// that the mask elements are either all even and in steps of size 2 or all odd
				// and in steps of size 2.
				// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
				// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
				// v2={e,f,g,h}
				// Requires similar checks to that of isVTRNMask with
				// respect the how results are returned.
	static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {			static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned EltSz = VT.getVectorElementType().getSizeInBits();			unsigned EltSz = VT.getVectorElementType().getSizeInBits();
	if (EltSz == 64)			if (EltSz == 64)
	return false;			return false;

	unsigned NumElts = VT.getVectorNumElements();			unsigned NumElts = VT.getVectorNumElements();
	WhichResult = (M[0] == 0 ? 0 : 1);			if (M.size() != NumElts && M.size() != NumElts*2)
	for (unsigned i = 0; i != NumElts; ++i) {			return false;
	if (M[i] < 0) continue; // ignore UNDEF indices
	if ((unsigned) M[i] != 2 * i + WhichResult)			for (unsigned i = 0; i < M.size(); i += NumElts) {
				WhichResult = M[i] == 0 ? 0 : 1;
				for (unsigned j = 0; j < NumElts; ++j) {
				if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
	return false;			return false;
	}			}
				}

				if (M.size() == NumElts*2)
				WhichResult = 0;

	// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.			// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
	if (VT.is64BitVector() && EltSz == 32)			if (VT.is64BitVector() && EltSz == 32)
	return false;			return false;

	return true;			return true;
	}			}

	/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of			/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".			/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,			/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
	static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){			static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
	unsigned EltSz = VT.getVectorElementType().getSizeInBits();			unsigned EltSz = VT.getVectorElementType().getSizeInBits();
	if (EltSz == 64)			if (EltSz == 64)
	return false;			return false;

	unsigned Half = VT.getVectorNumElements() / 2;			unsigned NumElts = VT.getVectorNumElements();
	WhichResult = (M[0] == 0 ? 0 : 1);			if (M.size() != NumElts && M.size() != NumElts*2)
	for (unsigned j = 0; j != 2; ++j) {			return false;

				unsigned Half = NumElts / 2;
				for (unsigned i = 0; i < M.size(); i += NumElts) {
				WhichResult = M[i] == 0 ? 0 : 1;
				for (unsigned j = 0; j < NumElts; j += Half) {
	unsigned Idx = WhichResult;			unsigned Idx = WhichResult;
	for (unsigned i = 0; i != Half; ++i) {			for (unsigned k = 0; k < Half; ++k) {
	int MIdx = M[i + j * Half];			int MIdx = M[i + j + k];
	if (MIdx >= 0 && (unsigned) MIdx != Idx)			if (MIdx >= 0 && (unsigned) MIdx != Idx)
	return false;			return false;
	Idx += 2;			Idx += 2;
	}			}
	}			}
				}

				if (M.size() == NumElts*2)
				WhichResult = 0;

	// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.			// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
	if (VT.is64BitVector() && EltSz == 32)			if (VT.is64BitVector() && EltSz == 32)
	return false;			return false;

	return true;			return true;
	}			}

				// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
				// that pairs of elements of the shufflemask represent the same index in each
				// vector incrementing sequentially through the vectors.
				// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
				// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
				// v2={e,f,g,h}
				// Requires similar checks to that of isVTRNMask with respect the how results
				// are returned.
	static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {			static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned EltSz = VT.getVectorElementType().getSizeInBits();			unsigned EltSz = VT.getVectorElementType().getSizeInBits();
	if (EltSz == 64)			if (EltSz == 64)
	return false;			return false;

	unsigned NumElts = VT.getVectorNumElements();			unsigned NumElts = VT.getVectorNumElements();
	WhichResult = (M[0] == 0 ? 0 : 1);			if (M.size() != NumElts && M.size() != NumElts*2)
				return false;

				for (unsigned i = 0; i < M.size(); i += NumElts) {
				WhichResult = M[i] == 0 ? 0 : 1;
	unsigned Idx = WhichResult * NumElts / 2;			unsigned Idx = WhichResult * NumElts / 2;
	for (unsigned i = 0; i != NumElts; i += 2) {			for (unsigned j = 0; j < NumElts; j += 2) {
	if ((M[i] >= 0 && (unsigned) M[i] != Idx) \|\|			if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) \|\|
	(M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))			(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
	return false;			return false;
	Idx += 1;			Idx += 1;
	}			}
				}

				if (M.size() == NumElts*2)
				WhichResult = 0;

	// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.			// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
	if (VT.is64BitVector() && EltSz == 32)			if (VT.is64BitVector() && EltSz == 32)
	return false;			return false;

	return true;			return true;
	}			}

	/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of			/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".			/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.			/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
	static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){			static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
	unsigned EltSz = VT.getVectorElementType().getSizeInBits();			unsigned EltSz = VT.getVectorElementType().getSizeInBits();
	if (EltSz == 64)			if (EltSz == 64)
	return false;			return false;

	unsigned NumElts = VT.getVectorNumElements();			unsigned NumElts = VT.getVectorNumElements();
	WhichResult = (M[0] == 0 ? 0 : 1);			if (M.size() != NumElts && M.size() != NumElts*2)
				return false;

				for (unsigned i = 0; i < M.size(); i += NumElts) {
				WhichResult = M[i] == 0 ? 0 : 1;
	unsigned Idx = WhichResult * NumElts / 2;			unsigned Idx = WhichResult * NumElts / 2;
	for (unsigned i = 0; i != NumElts; i += 2) {			for (unsigned j = 0; j < NumElts; j += 2) {
	if ((M[i] >= 0 && (unsigned) M[i] != Idx) \|\|			if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) \|\|
	(M[i+1] >= 0 && (unsigned) M[i+1] != Idx))			(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
	return false;			return false;
	Idx += 1;			Idx += 1;
	}			}
				}

				if (M.size() == NumElts*2)
				WhichResult = 0;

	// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.			// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
	if (VT.is64BitVector() && EltSz == 32)			if (VT.is64BitVector() && EltSz == 32)
	return false;			return false;

	return true;			return true;
	}			}

	▲ Show 20 Lines • Show All 6,597 Lines • Show Last 20 Lines

test/CodeGen/ARM/vext.ll

Show First 20 Lines • Show All 190 Lines • ▼ Show 20 Lines	; CHECK: vstr
%tmp3 = extractelement <4 x i32> %tmp1, i32 2		%tmp3 = extractelement <4 x i32> %tmp1, i32 2
%tmp4 = trunc i32 %tmp2 to i16		%tmp4 = trunc i32 %tmp2 to i16
%tmp5 = trunc i32 %tmp3 to i16		%tmp5 = trunc i32 %tmp3 to i16
%tmp6 = insertelement <4 x i16> undef, i16 %tmp4, i32 0		%tmp6 = insertelement <4 x i16> undef, i16 %tmp4, i32 0
%tmp7 = insertelement <4 x i16> %tmp6, i16 %tmp5, i32 1		%tmp7 = insertelement <4 x i16> %tmp6, i16 %tmp5, i32 1
store <4 x i16> %tmp7, <4 x i16>* %dest, align 4		store <4 x i16> %tmp7, <4 x i16>* %dest, align 4
ret void		ret void
}		}

		define <4 x i32> @test_reverse_and_extract(<2 x i32>* %A) {
		entry:
		; CHECK-LABEL: test_reverse_and_extract
		; CHECK-NOT: vtrn
		; CHECK: vrev
		; CHECK: vext
		%tmp1 = load <2 x i32>, <2 x i32>* %A
		%0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 0>
		ret <4 x i32> %0
		}

		define <4 x i32> @test_dup_and_extract(<2 x i32>* %A) {
		entry:
		; CHECK-LABEL: test_dup_and_extract
		; CHECK-NOT: vtrn
		; CHECK: vdup
		; CHECK: vext
		%tmp1 = load <2 x i32>, <2 x i32>* %A
		%0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
		ret <4 x i32> %0
		}

		define <4 x i32> @test_zip_and_extract(<2 x i32>* %A) {
		entry:
		; CHECK-LABEL: test_zip_and_extract
		; CHECK: vzip
		; CHECK: vext
		%tmp1 = load <2 x i32>, <2 x i32>* %A
		%0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 1>
		ret <4 x i32> %0
		}

test/CodeGen/ARM/vtrn.ll

	Show First 20 Lines • Show All 319 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]!			; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]!
	; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]			; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
	; CHECK-NEXT: mov pc, lr			; CHECK-NEXT: mov pc, lr
	%tmp1 = load <8 x i16>, <8 x i16>* %A			%tmp1 = load <8 x i16>, <8 x i16>* %A
	%tmp2 = load <8 x i16>, <8 x i16>* %B			%tmp2 = load <8 x i16>, <8 x i16>* %B
	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>			%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
	ret <16 x i16> %tmp3			ret <16 x i16> %tmp3
	}			}

				define <8 x i16> @vtrn_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
				entry:
				; CHECK-LABEL: vtrn_lower_shufflemask_undef
				; CHECK: vtrn
				%tmp1 = load <4 x i16>, <4 x i16>* %A
				%tmp2 = load <4 x i16>, <4 x i16>* %B
				%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7>
				ret <8 x i16> %0
				}

test/CodeGen/ARM/vuzp.ll

	Show First 20 Lines • Show All 258 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]!			; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]!
	; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]			; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
	; CHECK-NEXT: mov pc, lr			; CHECK-NEXT: mov pc, lr
	%tmp1 = load <8 x i16>, <8 x i16>* %A			%tmp1 = load <8 x i16>, <8 x i16>* %A
	%tmp2 = load <8 x i16>, <8 x i16>* %B			%tmp2 = load <8 x i16>, <8 x i16>* %B
	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>			%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
	ret <16 x i16> %tmp3			ret <16 x i16> %tmp3
	}			}

				define <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
				entry:
				; CHECK-LABEL: vuzp_lower_shufflemask_undef
				; CHECK: vuzp
				%tmp1 = load <4 x i16>, <4 x i16>* %A
				%tmp2 = load <4 x i16>, <4 x i16>* %B
				%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7>
				ret <8 x i16> %0
				}

				define <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) {
				entry:
				; CHECK-LABEL: vuzp_lower_shufflemask_zeroed
				; CHECK-NOT: vtrn
				; CHECK: vuzp
				%tmp1 = load <2 x i32>, <2 x i32>* %A
				%tmp2 = load <2 x i32>, <2 x i32>* %B
				%0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3>
				ret <4 x i32> %0
				}

test/CodeGen/ARM/vzip.ll

	Show First 20 Lines • Show All 258 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]!			; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]!
	; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]			; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
	; CHECK-NEXT: mov pc, lr			; CHECK-NEXT: mov pc, lr
	%tmp1 = load <16 x i8>, <16 x i8>* %A			%tmp1 = load <16 x i8>, <16 x i8>* %A
	%tmp2 = load <16 x i8>, <16 x i8>* %B			%tmp2 = load <16 x i8>, <16 x i8>* %B
	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>			%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
	ret <32 x i8> %tmp3			ret <32 x i8> %tmp3
	}			}

				define <8 x i16> @vzip_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
				entry:
				; CHECK-LABEL: vzip_lower_shufflemask_undef
				; CHECK: vzip
				%tmp1 = load <4 x i16>, <4 x i16>* %A
				%tmp2 = load <4 x i16>, <4 x i16>* %B
				%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
				ret <8 x i16> %0
				}

				define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
				entry:
				; CHECK-LABEL: vzip_lower_shufflemask_zeroed
				; CHECK-NOT: vtrn
				; CHECK: vzip
				%tmp1 = load <2 x i32>, <2 x i32>* %A
				%0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
				ret <4 x i32> %0
				}

				define <4 x i32> @vzip_lower_shufflemask_vuzp(<2 x i32>* %A) {
				entry:
				; CHECK-LABEL: vzip_lower_shufflemask_vuzp
				; CHECK-NOT: vuzp
				; CHECK: vzip
				%tmp1 = load <2 x i32>, <2 x i32>* %A
				%0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
				ret <4 x i32> %0
				}