Diff 507689

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,083 Lines • ▼ Show 20 Lines	static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,

// One source should be low half of first vector.		// One source should be low half of first vector.
if (EvenSrc != 0 && OddSrc != 0)		if (EvenSrc != 0 && OddSrc != 0)
return false;		return false;

return true;		return true;
}		}

		/// Returns true if a shuffle can be represented as a
		/// (select (slide v1, v1offset), (slide v2, v2offset), selectmask)
		static bool isSelectShuffle(ArrayRef<int> Mask, int &V1Offset, int &V2Offset) {
		unsigned NumElts = Mask.size();
		std::optional<int> V1OffsetOpt, V2OffsetOpt;
		for (auto [i, MaskIndex] : enumerate(Mask)) {
		// TODO: We could handle undef mask indices
		if (MaskIndex < 0)
		return false;
		if ((unsigned)MaskIndex < NumElts) {
		// The element is from v1
		if (!V1OffsetOpt)
		V1OffsetOpt = i - MaskIndex;
		else if ((unsigned)MaskIndex != i - *V1OffsetOpt)
		return false;
		} else {
		// The element is from v2
		if (!V2OffsetOpt)
		V2OffsetOpt = (NumElts + i) - MaskIndex;
		else if ((unsigned)MaskIndex != i + NumElts - *V2OffsetOpt)
		return false;
		}
		}
		// If we didn't encounter an element from v1 or v2 then we can just report the
		// offset as 0
		V1Offset = V1OffsetOpt.value_or(0);
		V2Offset = V2OffsetOpt.value_or(0);
		return true;
		}

/// Match shuffles that concatenate two vectors, rotate the concatenation,		/// Match shuffles that concatenate two vectors, rotate the concatenation,
/// and then extract the original number of elements from the rotated result.		/// and then extract the original number of elements from the rotated result.
/// This is equivalent to vector.splice or X86's PALIGNR instruction. The		/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
/// returned rotation amount is for a rotate right, where elements move from		/// returned rotation amount is for a rotate right, where elements move from
/// higher elements to lower elements. \p LoSrc indicates the first source		/// higher elements to lower elements. \p LoSrc indicates the first source
/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector		/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be		/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
/// 0 or 1 if a rotation is found.		/// 0 or 1 if a rotation is found.
▲ Show 20 Lines • Show All 215 Lines • ▼ Show 20 Lines	return DAG.getNode(
DAG.getConstant(0, DL, XLenVT));		DAG.getConstant(0, DL, XLenVT));
}		}

// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx		// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
// to create an interleaved vector of <[vscale x] n*2 x ty>.		// to create an interleaved vector of <[vscale x] n*2 x ty>.
// This requires that the size of ty is less than the subtarget's maximum ELEN.		// This requires that the size of ty is less than the subtarget's maximum ELEN.
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, SDLoc &DL,		static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, SDLoc &DL,
SelectionDAG &DAG,		SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {		const RISCVSubtarget &Subtarget) {
		reamesUnsubmitted Not Done Reply Inline Actions white space on the ", tu," bit reames: white space on the ", tu," bit
MVT VecVT = EvenV.getSimpleValueType();		MVT VecVT = EvenV.getSimpleValueType();
MVT VecContainerVT = VecVT; // <vscale x n x ty>		MVT VecContainerVT = VecVT; // <vscale x n x ty>
// Convert fixed vectors to scalable if needed		// Convert fixed vectors to scalable if needed
if (VecContainerVT.isFixedLengthVector()) {		if (VecContainerVT.isFixedLengthVector()) {
VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);		VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);		EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);		OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
}		}
Show All 16 Lines	static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, SDLoc &DL,
EvenV = DAG.getBitcast(VecContainerVT, EvenV);		EvenV = DAG.getBitcast(VecContainerVT, EvenV);
OddV = DAG.getBitcast(VecContainerVT, OddV);		OddV = DAG.getBitcast(VecContainerVT, OddV);

auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);		auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
SDValue Passthru = DAG.getUNDEF(WideContainerVT);		SDValue Passthru = DAG.getUNDEF(WideContainerVT);

// Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with		// Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
// vwaddu.vv		// vwaddu.vv
SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,		SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
		reamesUnsubmitted Not Done Reply Inline Actions In the case where you're inserting a suffix into the InPlace vector, you don't need the TU here and can use TA instead. (e.g. you slide up into the last 4 elements of a 8 long vector) We may already catch that via a DAG combine or during vsetvli insertion, not sure. reames: In the case where you're inserting a suffix into the InPlace vector, you don't need the TU here…
EvenV, OddV, Passthru, Mask, VL);		EvenV, OddV, Passthru, Mask, VL);

// Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)		// Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
SDValue AllOnesVec = DAG.getSplatVector(		SDValue AllOnesVec = DAG.getSplatVector(
VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));		VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,		SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
AllOnesVec, Passthru, Mask, VL);		AllOnesVec, Passthru, Mask, VL);

▲ Show 20 Lines • Show All 183 Lines • ▼ Show 20 Lines	if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,		OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
DAG.getConstant(OddSrc % Size, DL, XLenVT));		DAG.getConstant(OddSrc % Size, DL, XLenVT));

return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);		return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
}		}

// Detect shuffles which can be re-expressed as vector selects; these are		// Detect shuffles which can be re-expressed as vector selects; these are
// shuffles in which each element in the destination is taken from an element		// shuffles in which each element in the destination is taken from an element
// at the corresponding index in either source vectors.		// at the corresponding index in either source vectors, where the source
bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {		// vectors can be slid up or down to make it match.
int MaskIndex = MaskIdx.value();		int V1SelectOffset, V2SelectOffset;
return MaskIndex < 0 \|\| MaskIdx.index() == (unsigned)MaskIndex % NumElts;		bool IsSelect = isSelectShuffle(Mask, V1SelectOffset, V2SelectOffset);
});

assert(!V1.isUndef() && "Unexpected shuffle canonicalization");		assert(!V1.isUndef() && "Unexpected shuffle canonicalization");

SmallVector<SDValue> MaskVals;		SmallVector<SDValue> MaskVals;
// As a backup, shuffles can be lowered via a vrgather instruction, possibly		// As a backup, shuffles can be lowered via a vrgather instruction, possibly
// merged with a second vrgather.		// merged with a second vrgather.
SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;		SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;

Show All 33 Lines	if (SwapOps) {
std::swap(V1, V2);		std::swap(V1, V2);
std::swap(GatherIndicesLHS, GatherIndicesRHS);		std::swap(GatherIndicesLHS, GatherIndicesRHS);
}		}

assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");		assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);		MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);		SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);

if (IsSelect)		if (IsSelect) {
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);		// vslideup/vslidedown either vector if it means we can perform the shuffle
		// by a vmerge
		auto SlideIfNeeded = [&VT, &DL, &DAG, &NumElts](SDValue V,
		unsigned Offset) {
		if (Offset == 0)
		return V;
		// Create a mask like <1, 2, 3, -1> or <-1, 0, 1, 2>
		SmallVector<int, 8> SlideMask(NumElts, -1);
		for (unsigned i = 0; i < NumElts; i++) {
		unsigned MaskIdx = i + Offset;
		if (MaskIdx < 0 \|\| MaskIdx >= NumElts)
		continue;
		SlideMask[MaskIdx] = i;
		}
		return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), SlideMask);
		};

		return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
		reamesUnsubmitted Not Done Reply Inline Actions I don't have a concrete example, but thinking about it, this seems suspect. You're computing the select condition vector using the elements at the original offsets, but actually selecting between elements which have been slide up or down. That doesn't seem right, we should be having to adjust the select mask too. reames: I don't have a concrete example, but thinking about it, this seems suspect. You're computing…
		SlideIfNeeded(V1, V1SelectOffset),
		SlideIfNeeded(V2, V2SelectOffset));
		}

if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {		if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
// On such a large vector we're unable to use i8 as the index type.		// On such a large vector we're unable to use i8 as the index type.
// FIXME: We could promote the index to i16 and use vrgatherei16, but that		// FIXME: We could promote the index to i16 and use vrgatherei16, but that
// may involve vector splitting if we're already at LMUL=8, or our		// may involve vector splitting if we're already at LMUL=8, or our
// user-supplied maximum fixed-length LMUL.		// user-supplied maximum fixed-length LMUL.
return SDValue();		return SDValue();
}		}
▲ Show 20 Lines • Show All 11,360 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Show First 20 Lines • Show All 640 Lines • ▼ Show 20 Lines	entry:
%2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>		%2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>		%3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%4 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>		%4 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
%5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> <i32 1, i32 2, i32 3, i32 4>		%5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i16> %5		ret <4 x i16> %5
}		}

define <8 x i8> @merge_start_into_end(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_start_into_end(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_start_into_end:		; CHECK-LABEL: merge_start_into_end:
		reamesUnsubmitted Not Done Reply Inline Actions Please land the test rename separately. I'd suggest something more generic like concat_4xi8 to avoid the strategy used needing a test rename. reames: Please land the test rename separately. I'd suggest something more generic like concat_4xi8 to…
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vid.v v11		; CHECK-NEXT: li a0, 15
		reamesUnsubmitted Not Done Reply Inline Actions This is the TA case I'd mentioned above, looks like we don't catch this. reames: This is the TA case I'd mentioned above, looks like we don't catch this.
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 240
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vadd.vi v8, v11, -4		; CHECK-NEXT: vslideup.vi v10, v9, 4
		reamesUnsubmitted Done Reply Inline Actions This can be done with a single slide up. You don't need the select here at all. reames: This can be done with a single slide up. You don't need the select here at all.
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t		; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_start_into_end_non_contiguous:		; CHECK-LABEL: merge_start_into_end_non_contiguous:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vid.v v11		; CHECK-NEXT: li a0, 111
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 144
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vadd.vi v8, v11, -4		; CHECK-NEXT: vslideup.vi v10, v9, 4
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t		; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 11>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 11>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_end_into_end:		; CHECK-LABEL: merge_end_into_end:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 15		; CHECK-NEXT: li a0, 15
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0		; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_start_into_middle:		; CHECK-LABEL: merge_start_into_middle:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vid.v v11		; CHECK-NEXT: li a0, 225
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 30
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vadd.vi v8, v11, -1		; CHECK-NEXT: vslideup.vi v10, v9, 1
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t		; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_start_into_start:		; CHECK-LABEL: merge_start_into_start:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 240		; CHECK-NEXT: li a0, 240
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0		; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_slidedown:		; CHECK-LABEL: merge_slidedown:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vid.v v11		; CHECK-NEXT: li a0, 60
; CHECK-NEXT: vadd.vi v12, v11, 1
; CHECK-NEXT: li a0, 195
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vrgather.vv v10, v8, v12		; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t		; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 14, i32 15>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 14, i32 15>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

; This should slide %v down by 2 and %w up by 1 before merging them		; This should slide %v down by 2 and %w up by 1 before merging them
define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_non_contiguous_slideup_slidedown:		; CHECK-LABEL: merge_non_contiguous_slideup_slidedown:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vid.v v11		; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vadd.vi v12, v11, 2		; CHECK-NEXT: li a0, 21
; CHECK-NEXT: vrgather.vv v10, v8, v12
; CHECK-NEXT: li a0, 234
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vadd.vi v8, v11, -1		; CHECK-NEXT: vslideup.vi v10, v9, 1
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t		; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

; This shouldn't generate a vmerge because the elements of %w are not consecutive		; This shouldn't generate a vmerge because the elements of %w are not consecutive
define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: unmergable:		; CHECK-LABEL: unmergable:
Show All 16 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

	Show First 20 Lines • Show All 144 Lines • ▼ Show 20 Lines
	; V-NEXT: vnsrl.wi v8, v8, 0			; V-NEXT: vnsrl.wi v8, v8, 0
	; V-NEXT: vse32.v v8, (a1)			; V-NEXT: vse32.v v8, (a1)
	; V-NEXT: ret			; V-NEXT: ret
	;			;
	; ZVE32F-LABEL: vnsrl_0_i32:			; ZVE32F-LABEL: vnsrl_0_i32:
	; ZVE32F: # %bb.0: # %entry			; ZVE32F: # %bb.0: # %entry
	; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma			; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
	; ZVE32F-NEXT: vle32.v v8, (a0)			; ZVE32F-NEXT: vle32.v v8, (a0)
	; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu			; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
	; ZVE32F-NEXT: vslidedown.vi v9, v8, 2			; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
	; ZVE32F-NEXT: li a0, 2			; ZVE32F-NEXT: li a0, 1
	; ZVE32F-NEXT: vmv.s.x v0, a0			; ZVE32F-NEXT: vmv.s.x v0, a0
	; ZVE32F-NEXT: vrgather.vi v10, v8, 0			; ZVE32F-NEXT: vrgather.vi v10, v9, 0
	; ZVE32F-NEXT: vrgather.vi v10, v9, 0, v0.t			; ZVE32F-NEXT: vmerge.vvm v8, v10, v8, v0
	; ZVE32F-NEXT: vse32.v v10, (a1)			; ZVE32F-NEXT: vse32.v v8, (a1)
				lukeAuthorUnsubmitted Done Reply Inline Actions So `rd=x0, rs1=x0` means to keep the existing VL. Could that mean these two vsetivli's could be merged into one `vsetivli zero, 2, e32, m1, tu, ma`, seeing as the second `tu` trumps the former `ta`? luke: So `rd=x0, rs1=x0` means to keep the existing VL. Could that mean these two vsetivli's could be…
				reamesUnsubmitted Not Done Reply Inline Actions Yes, though we can also just do better and use a single slidedown to form the shuffle here. Seems like a reasonable follow up patch. reames: Yes, though we can also just do better and use a single slidedown to form the shuffle here.
	; ZVE32F-NEXT: ret			; ZVE32F-NEXT: ret
	entry:			entry:
	%0 = load <4 x i32>, ptr %in, align 4			%0 = load <4 x i32>, ptr %in, align 4
	%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>			%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
	store <2 x i32> %shuffle.i5, ptr %out, align 4			store <2 x i32> %shuffle.i5, ptr %out, align 4
	ret void			ret void
	}			}

	define void @vnsrl_32_i32(ptr %in, ptr %out) {			define void @vnsrl_32_i32(ptr %in, ptr %out) {
	; V-LABEL: vnsrl_32_i32:			; V-LABEL: vnsrl_32_i32:
	; V: # %bb.0: # %entry			; V: # %bb.0: # %entry
	; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma			; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
	; V-NEXT: vle32.v v8, (a0)			; V-NEXT: vle32.v v8, (a0)
	; V-NEXT: li a0, 32			; V-NEXT: li a0, 32
	; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma			; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
	; V-NEXT: vnsrl.wx v8, v8, a0			; V-NEXT: vnsrl.wx v8, v8, a0
	; V-NEXT: vse32.v v8, (a1)			; V-NEXT: vse32.v v8, (a1)
	; V-NEXT: ret			; V-NEXT: ret
	;			;
	; ZVE32F-LABEL: vnsrl_32_i32:			; ZVE32F-LABEL: vnsrl_32_i32:
	; ZVE32F: # %bb.0: # %entry			; ZVE32F: # %bb.0: # %entry
	; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma			; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
	; ZVE32F-NEXT: vle32.v v8, (a0)			; ZVE32F-NEXT: vle32.v v8, (a0)
	; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu			; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
	; ZVE32F-NEXT: vslidedown.vi v9, v8, 2			; ZVE32F-NEXT: li a0, 1
	; ZVE32F-NEXT: li a0, 2
	; ZVE32F-NEXT: vmv.s.x v0, a0			; ZVE32F-NEXT: vmv.s.x v0, a0
	; ZVE32F-NEXT: vrgather.vi v10, v8, 1			; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
	; ZVE32F-NEXT: vrgather.vi v10, v9, 1, v0.t			; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t
	; ZVE32F-NEXT: vse32.v v10, (a1)			; ZVE32F-NEXT: vse32.v v9, (a1)
	; ZVE32F-NEXT: ret			; ZVE32F-NEXT: ret
	entry:			entry:
	%0 = load <4 x i32>, ptr %in, align 4			%0 = load <4 x i32>, ptr %in, align 4
	%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3>			%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
	store <2 x i32> %shuffle.i5, ptr %out, align 4			store <2 x i32> %shuffle.i5, ptr %out, align 4
	ret void			ret void
	}			}

	define void @vnsrl_0_float(ptr %in, ptr %out) {			define void @vnsrl_0_float(ptr %in, ptr %out) {
	; V-LABEL: vnsrl_0_float:			; V-LABEL: vnsrl_0_float:
	; V: # %bb.0: # %entry			; V: # %bb.0: # %entry
	; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma			; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
	; V-NEXT: vle32.v v8, (a0)			; V-NEXT: vle32.v v8, (a0)
	; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma			; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
	; V-NEXT: vnsrl.wi v8, v8, 0			; V-NEXT: vnsrl.wi v8, v8, 0
	; V-NEXT: vse32.v v8, (a1)			; V-NEXT: vse32.v v8, (a1)
	; V-NEXT: ret			; V-NEXT: ret
	;			;
	; ZVE32F-LABEL: vnsrl_0_float:			; ZVE32F-LABEL: vnsrl_0_float:
	; ZVE32F: # %bb.0: # %entry			; ZVE32F: # %bb.0: # %entry
	; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma			; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
	; ZVE32F-NEXT: vle32.v v8, (a0)			; ZVE32F-NEXT: vle32.v v8, (a0)
	; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu			; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
	; ZVE32F-NEXT: vslidedown.vi v9, v8, 2			; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
	; ZVE32F-NEXT: li a0, 2			; ZVE32F-NEXT: li a0, 1
	; ZVE32F-NEXT: vmv.s.x v0, a0			; ZVE32F-NEXT: vmv.s.x v0, a0
	; ZVE32F-NEXT: vrgather.vi v10, v8, 0			; ZVE32F-NEXT: vrgather.vi v10, v9, 0
	; ZVE32F-NEXT: vrgather.vi v10, v9, 0, v0.t			; ZVE32F-NEXT: vmerge.vvm v8, v10, v8, v0
	; ZVE32F-NEXT: vse32.v v10, (a1)			; ZVE32F-NEXT: vse32.v v8, (a1)
	; ZVE32F-NEXT: ret			; ZVE32F-NEXT: ret
	entry:			entry:
	%0 = load <4 x float>, ptr %in, align 4			%0 = load <4 x float>, ptr %in, align 4
	%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>			%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
	store <2 x float> %shuffle.i5, ptr %out, align 4			store <2 x float> %shuffle.i5, ptr %out, align 4
	ret void			ret void
	}			}

	define void @vnsrl_32_float(ptr %in, ptr %out) {			define void @vnsrl_32_float(ptr %in, ptr %out) {
	; V-LABEL: vnsrl_32_float:			; V-LABEL: vnsrl_32_float:
	; V: # %bb.0: # %entry			; V: # %bb.0: # %entry
	; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma			; V-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
	; V-NEXT: vle32.v v8, (a0)			; V-NEXT: vle32.v v8, (a0)
	; V-NEXT: li a0, 32			; V-NEXT: li a0, 32
	; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma			; V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
	; V-NEXT: vnsrl.wx v8, v8, a0			; V-NEXT: vnsrl.wx v8, v8, a0
	; V-NEXT: vse32.v v8, (a1)			; V-NEXT: vse32.v v8, (a1)
	; V-NEXT: ret			; V-NEXT: ret
	;			;
	; ZVE32F-LABEL: vnsrl_32_float:			; ZVE32F-LABEL: vnsrl_32_float:
	; ZVE32F: # %bb.0: # %entry			; ZVE32F: # %bb.0: # %entry
	; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma			; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
	; ZVE32F-NEXT: vle32.v v8, (a0)			; ZVE32F-NEXT: vle32.v v8, (a0)
	; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu			; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
	; ZVE32F-NEXT: vslidedown.vi v9, v8, 2			; ZVE32F-NEXT: li a0, 1
	; ZVE32F-NEXT: li a0, 2
	; ZVE32F-NEXT: vmv.s.x v0, a0			; ZVE32F-NEXT: vmv.s.x v0, a0
	; ZVE32F-NEXT: vrgather.vi v10, v8, 1			; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
	; ZVE32F-NEXT: vrgather.vi v10, v9, 1, v0.t			; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t
	; ZVE32F-NEXT: vse32.v v10, (a1)			; ZVE32F-NEXT: vse32.v v9, (a1)
	; ZVE32F-NEXT: ret			; ZVE32F-NEXT: ret
	entry:			entry:
	%0 = load <4 x float>, ptr %in, align 4			%0 = load <4 x float>, ptr %in, align 4
	%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 1, i32 3>			%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 1, i32 3>
	store <2 x float> %shuffle.i5, ptr %out, align 4			store <2 x float> %shuffle.i5, ptr %out, align 4
	ret void			ret void
	}			}

	define void @vnsrl_0_i64(ptr %in, ptr %out) {			define void @vnsrl_0_i64(ptr %in, ptr %out) {
	; V-LABEL: vnsrl_0_i64:			; V-LABEL: vnsrl_0_i64:
	; V: # %bb.0: # %entry			; V: # %bb.0: # %entry
	; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma			; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
	; V-NEXT: vle64.v v8, (a0)			; V-NEXT: vle64.v v8, (a0)
	; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu			; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
	; V-NEXT: vslidedown.vi v9, v8, 2			; V-NEXT: vslidedown.vi v9, v8, 2
	; V-NEXT: li a0, 2			; V-NEXT: li a0, 1
	; V-NEXT: vmv.s.x v0, a0			; V-NEXT: vmv.s.x v0, a0
	; V-NEXT: vrgather.vi v10, v8, 0			; V-NEXT: vrgather.vi v10, v9, 0
	; V-NEXT: vrgather.vi v10, v9, 0, v0.t			; V-NEXT: vmerge.vvm v8, v10, v8, v0
	; V-NEXT: vse64.v v10, (a1)			; V-NEXT: vse64.v v8, (a1)
	; V-NEXT: ret			; V-NEXT: ret
	;			;
	; ZVE32F-LABEL: vnsrl_0_i64:			; ZVE32F-LABEL: vnsrl_0_i64:
	; ZVE32F: # %bb.0: # %entry			; ZVE32F: # %bb.0: # %entry
	; ZVE32F-NEXT: ld a2, 16(a0)			; ZVE32F-NEXT: ld a2, 16(a0)
	; ZVE32F-NEXT: ld a0, 0(a0)			; ZVE32F-NEXT: ld a0, 0(a0)
	; ZVE32F-NEXT: sd a2, 8(a1)			; ZVE32F-NEXT: sd a2, 8(a1)
	; ZVE32F-NEXT: sd a0, 0(a1)			; ZVE32F-NEXT: sd a0, 0(a1)
	; ZVE32F-NEXT: ret			; ZVE32F-NEXT: ret
	entry:			entry:
	%0 = load <4 x i64>, ptr %in, align 8			%0 = load <4 x i64>, ptr %in, align 8
	%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>			%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
	store <2 x i64> %shuffle.i5, ptr %out, align 8			store <2 x i64> %shuffle.i5, ptr %out, align 8
	ret void			ret void
	}			}

	define void @vnsrl_64_i64(ptr %in, ptr %out) {			define void @vnsrl_64_i64(ptr %in, ptr %out) {
	; V-LABEL: vnsrl_64_i64:			; V-LABEL: vnsrl_64_i64:
	; V: # %bb.0: # %entry			; V: # %bb.0: # %entry
	; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma			; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
	; V-NEXT: vle64.v v8, (a0)			; V-NEXT: vle64.v v8, (a0)
	; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu			; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
	; V-NEXT: vslidedown.vi v9, v8, 2			; V-NEXT: li a0, 1
	; V-NEXT: li a0, 2
	; V-NEXT: vmv.s.x v0, a0			; V-NEXT: vmv.s.x v0, a0
	; V-NEXT: vrgather.vi v10, v8, 1			; V-NEXT: vslidedown.vi v9, v8, 2
	; V-NEXT: vrgather.vi v10, v9, 1, v0.t			; V-NEXT: vrgather.vi v9, v8, 1, v0.t
	; V-NEXT: vse64.v v10, (a1)			; V-NEXT: vse64.v v9, (a1)
	; V-NEXT: ret			; V-NEXT: ret
	;			;
	; ZVE32F-LABEL: vnsrl_64_i64:			; ZVE32F-LABEL: vnsrl_64_i64:
	; ZVE32F: # %bb.0: # %entry			; ZVE32F: # %bb.0: # %entry
	; ZVE32F-NEXT: ld a2, 24(a0)			; ZVE32F-NEXT: ld a2, 24(a0)
	; ZVE32F-NEXT: ld a0, 8(a0)			; ZVE32F-NEXT: ld a0, 8(a0)
	; ZVE32F-NEXT: sd a2, 8(a1)			; ZVE32F-NEXT: sd a2, 8(a1)
	; ZVE32F-NEXT: sd a0, 0(a1)			; ZVE32F-NEXT: sd a0, 0(a1)
	; ZVE32F-NEXT: ret			; ZVE32F-NEXT: ret
	entry:			entry:
	%0 = load <4 x i64>, ptr %in, align 8			%0 = load <4 x i64>, ptr %in, align 8
	%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>			%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
	store <2 x i64> %shuffle.i5, ptr %out, align 8			store <2 x i64> %shuffle.i5, ptr %out, align 8
	ret void			ret void
	}			}

	define void @vnsrl_0_double(ptr %in, ptr %out) {			define void @vnsrl_0_double(ptr %in, ptr %out) {
	; V-LABEL: vnsrl_0_double:			; V-LABEL: vnsrl_0_double:
	; V: # %bb.0: # %entry			; V: # %bb.0: # %entry
	; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma			; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
	; V-NEXT: vle64.v v8, (a0)			; V-NEXT: vle64.v v8, (a0)
	; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu			; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
	; V-NEXT: vslidedown.vi v9, v8, 2			; V-NEXT: vslidedown.vi v9, v8, 2
	; V-NEXT: li a0, 2			; V-NEXT: li a0, 1
	; V-NEXT: vmv.s.x v0, a0			; V-NEXT: vmv.s.x v0, a0
	; V-NEXT: vrgather.vi v10, v8, 0			; V-NEXT: vrgather.vi v10, v9, 0
	; V-NEXT: vrgather.vi v10, v9, 0, v0.t			; V-NEXT: vmerge.vvm v8, v10, v8, v0
	; V-NEXT: vse64.v v10, (a1)			; V-NEXT: vse64.v v8, (a1)
	; V-NEXT: ret			; V-NEXT: ret
	;			;
	; ZVE32F-LABEL: vnsrl_0_double:			; ZVE32F-LABEL: vnsrl_0_double:
	; ZVE32F: # %bb.0: # %entry			; ZVE32F: # %bb.0: # %entry
	; ZVE32F-NEXT: ld a2, 16(a0)			; ZVE32F-NEXT: ld a2, 16(a0)
	; ZVE32F-NEXT: ld a0, 0(a0)			; ZVE32F-NEXT: ld a0, 0(a0)
	; ZVE32F-NEXT: sd a2, 8(a1)			; ZVE32F-NEXT: sd a2, 8(a1)
	; ZVE32F-NEXT: sd a0, 0(a1)			; ZVE32F-NEXT: sd a0, 0(a1)
	; ZVE32F-NEXT: ret			; ZVE32F-NEXT: ret
	entry:			entry:
	%0 = load <4 x double>, ptr %in, align 8			%0 = load <4 x double>, ptr %in, align 8
	%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 0, i32 2>			%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 0, i32 2>
	store <2 x double> %shuffle.i5, ptr %out, align 8			store <2 x double> %shuffle.i5, ptr %out, align 8
	ret void			ret void
	}			}

	define void @vnsrl_64_double(ptr %in, ptr %out) {			define void @vnsrl_64_double(ptr %in, ptr %out) {
	; V-LABEL: vnsrl_64_double:			; V-LABEL: vnsrl_64_double:
	; V: # %bb.0: # %entry			; V: # %bb.0: # %entry
	; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma			; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
	; V-NEXT: vle64.v v8, (a0)			; V-NEXT: vle64.v v8, (a0)
	; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu			; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
	; V-NEXT: vslidedown.vi v9, v8, 2			; V-NEXT: li a0, 1
	; V-NEXT: li a0, 2
	; V-NEXT: vmv.s.x v0, a0			; V-NEXT: vmv.s.x v0, a0
	; V-NEXT: vrgather.vi v10, v8, 1			; V-NEXT: vslidedown.vi v9, v8, 2
	; V-NEXT: vrgather.vi v10, v9, 1, v0.t			; V-NEXT: vrgather.vi v9, v8, 1, v0.t
	; V-NEXT: vse64.v v10, (a1)			; V-NEXT: vse64.v v9, (a1)
	; V-NEXT: ret			; V-NEXT: ret
	;			;
	; ZVE32F-LABEL: vnsrl_64_double:			; ZVE32F-LABEL: vnsrl_64_double:
	; ZVE32F: # %bb.0: # %entry			; ZVE32F: # %bb.0: # %entry
	; ZVE32F-NEXT: ld a2, 24(a0)			; ZVE32F-NEXT: ld a2, 24(a0)
	; ZVE32F-NEXT: ld a0, 8(a0)			; ZVE32F-NEXT: ld a0, 8(a0)
	; ZVE32F-NEXT: sd a2, 8(a1)			; ZVE32F-NEXT: sd a2, 8(a1)
	; ZVE32F-NEXT: sd a0, 0(a1)			; ZVE32F-NEXT: sd a0, 0(a1)
	; ZVE32F-NEXT: ret			; ZVE32F-NEXT: ret
	entry:			entry:
	%0 = load <4 x double>, ptr %in, align 8			%0 = load <4 x double>, ptr %in, align 8
	%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3>			%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3>
	store <2 x double> %shuffle.i5, ptr %out, align 8			store <2 x double> %shuffle.i5, ptr %out, align 8
	ret void			ret void
	}			}

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

	Show First 20 Lines • Show All 285 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%retval = call {<4 x i32>, <4 x i32>} @llvm.experimental.vector.deinterleave2.v8i32(<8 x i32> %vec)			%retval = call {<4 x i32>, <4 x i32>} @llvm.experimental.vector.deinterleave2.v8i32(<8 x i32> %vec)
	ret {<4 x i32>, <4 x i32>} %retval			ret {<4 x i32>, <4 x i32>} %retval
	}			}

	define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {			define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
	; CHECK-LABEL: vector_deinterleave_v2i64_v4i64:			; CHECK-LABEL: vector_deinterleave_v2i64_v4i64:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma			; CHECK-NEXT: li a0, 1
	; CHECK-NEXT: vslidedown.vi v12, v8, 2			; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
	; CHECK-NEXT: li a0, 2
	; CHECK-NEXT: vmv.s.x v0, a0			; CHECK-NEXT: vmv.s.x v0, a0
				; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
				; CHECK-NEXT: vslidedown.vi v10, v8, 2
	; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu			; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
	; CHECK-NEXT: vrgather.vi v10, v8, 0			; CHECK-NEXT: vrgather.vi v9, v10, 0
	; CHECK-NEXT: vrgather.vi v10, v12, 0, v0.t			; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
	; CHECK-NEXT: vrgather.vi v9, v8, 1			; CHECK-NEXT: vrgather.vi v10, v8, 1, v0.t
	; CHECK-NEXT: vrgather.vi v9, v12, 1, v0.t			; CHECK-NEXT: vmv.v.v v8, v9
	; CHECK-NEXT: vmv.v.v v8, v10			; CHECK-NEXT: vmv.v.v v9, v10
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%retval = call {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> %vec)			%retval = call {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> %vec)
	ret {<2 x i64>, <2 x i64>} %retval			ret {<2 x i64>, <2 x i64>} %retval
	}			}

	declare {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1>)			declare {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1>)
	declare {<16 x i8>, <16 x i8>} @llvm.experimental.vector.deinterleave2.v32i8(<32 x i8>)			declare {<16 x i8>, <16 x i8>} @llvm.experimental.vector.deinterleave2.v32i8(<32 x i8>)
	declare {<8 x i16>, <8 x i16>} @llvm.experimental.vector.deinterleave2.v16i16(<16 x i16>)			declare {<8 x i16>, <8 x i16>} @llvm.experimental.vector.deinterleave2.v16i16(<16 x i16>)
	▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%retval = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %vec)			%retval = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %vec)
	ret {<4 x float>, <4 x float>} %retval			ret {<4 x float>, <4 x float>} %retval
	}			}

	define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) {			define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) {
	; CHECK-LABEL: vector_deinterleave_v2f64_v4f64:			; CHECK-LABEL: vector_deinterleave_v2f64_v4f64:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma			; CHECK-NEXT: li a0, 1
	; CHECK-NEXT: vslidedown.vi v12, v8, 2			; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
	; CHECK-NEXT: li a0, 2
	; CHECK-NEXT: vmv.s.x v0, a0			; CHECK-NEXT: vmv.s.x v0, a0
				; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
				; CHECK-NEXT: vslidedown.vi v10, v8, 2
	; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu			; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
	; CHECK-NEXT: vrgather.vi v10, v8, 0			; CHECK-NEXT: vrgather.vi v9, v10, 0
	; CHECK-NEXT: vrgather.vi v10, v12, 0, v0.t			; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
	; CHECK-NEXT: vrgather.vi v9, v8, 1			; CHECK-NEXT: vrgather.vi v10, v8, 1, v0.t
	; CHECK-NEXT: vrgather.vi v9, v12, 1, v0.t			; CHECK-NEXT: vmv.v.v v8, v9
	; CHECK-NEXT: vmv.v.v v8, v10			; CHECK-NEXT: vmv.v.v v9, v10
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%retval = call {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec)			%retval = call {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec)
	ret {<2 x double>, <2 x double>} %retval			ret {<2 x double>, <2 x double>} %retval
	}			}

	declare {<2 x half>,<2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>)			declare {<2 x half>,<2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>)
	declare {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half>)			declare {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half>)
	declare {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float>)			declare {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float>)
	declare {<8 x half>, <8 x half>} @llvm.experimental.vector.deinterleave2.v16f16(<16 x half>)			declare {<8 x half>, <8 x half>} @llvm.experimental.vector.deinterleave2.v16f16(<16 x half>)
	declare {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float>)			declare {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float>)
	declare {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double>)			declare {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double>)

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Lower insert subvector shuffles as vslideups
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 507689

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Lower insert subvector shuffles as vslideupsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 507689

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

[RISCV] Lower insert subvector shuffles as vslideups
ClosedPublic