Diff 508055

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,309 Lines • ▼ Show 20 Lines	SDValue Slidedown =
convertToScalableVector(ContainerVT, Src, DAG, Subtarget),		convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);		DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
return DAG.getNode(		return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, DL, VT,		ISD::EXTRACT_SUBVECTOR, DL, VT,
convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),		convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
DAG.getConstant(0, DL, XLenVT));		DAG.getConstant(0, DL, XLenVT));
}		}

		// Because vslideup leaves the destination elements at the start intact, we can
		// use it to perform shuffles that insert subvectors:
		//
		// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
		// ->
		// vsetvli zero, 8, e8, mf2, tu,ma
		reamesUnsubmitted Not Done Reply Inline Actions white space on the ", tu," bit reames: white space on the ", tu," bit
		// vslideup.vi v8, v9, 4
		//
		// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
		// ->
		// vsetvli zero, 5, e8, mf2, tu,ma
		// vslideup.v1 v8, v9, 2
		static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
		SDValue V1, SDValue V2,
		ArrayRef<int> Mask,
		const RISCVSubtarget &Subtarget,
		SelectionDAG &DAG) {
		unsigned NumElts = VT.getVectorNumElements();
		int NumSubElts, Index;
		if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
		Index))
		return SDValue();

		bool OpsSwapped = Mask[Index] < (int)NumElts;
		SDValue InPlace = OpsSwapped ? V2 : V1;
		SDValue ToInsert = OpsSwapped ? V1 : V2;

		MVT XLenVT = Subtarget.getXLenVT();
		MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
		auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
		// We slide up by the index that the subvector is being inserted at, and set
		// VL to the index + the number of elements being inserted
		SDValue Slideup = getVSlideup(
		DAG, Subtarget, DL, ContainerVT,
		convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget),
		convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget),
		DAG.getConstant(Index, DL, XLenVT), TrueMask,
		DAG.getConstant(NumSubElts + Index, DL, XLenVT),
		RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED \| RISCVII::MASK_AGNOSTIC);
		reamesUnsubmitted Not Done Reply Inline Actions In the case where you're inserting a suffix into the InPlace vector, you don't need the TU here and can use TA instead. (e.g. you slide up into the last 4 elements of a 8 long vector) We may already catch that via a DAG combine or during vsetvli insertion, not sure. reames: In the case where you're inserting a suffix into the InPlace vector, you don't need the TU here…
		return convertFromScalableVector(VT, Slideup, DAG, Subtarget);
		}

// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx		// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
// to create an interleaved vector of <[vscale x] n*2 x ty>.		// to create an interleaved vector of <[vscale x] n*2 x ty>.
// This requires that the size of ty is less than the subtarget's maximum ELEN.		// This requires that the size of ty is less than the subtarget's maximum ELEN.
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, SDLoc &DL,		static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, SDLoc &DL,
SelectionDAG &DAG,		SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {		const RISCVSubtarget &Subtarget) {
MVT VecVT = EvenV.getSimpleValueType();		MVT VecVT = EvenV.getSimpleValueType();
MVT VecContainerVT = VecVT; // <vscale x n x ty>		MVT VecContainerVT = VecVT; // <vscale x n x ty>
▲ Show 20 Lines • Show All 220 Lines • ▼ Show 20 Lines	if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
assert(OddSrc >= 0 && "Undef source?");		assert(OddSrc >= 0 && "Undef source?");
OddV = (OddSrc / Size) == 0 ? V1 : V2;		OddV = (OddSrc / Size) == 0 ? V1 : V2;
OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,		OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
DAG.getConstant(OddSrc % Size, DL, XLenVT));		DAG.getConstant(OddSrc % Size, DL, XLenVT));

return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);		return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
}		}

		if (SDValue V =
		lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
		return V;

// Detect shuffles which can be re-expressed as vector selects; these are		// Detect shuffles which can be re-expressed as vector selects; these are
// shuffles in which each element in the destination is taken from an element		// shuffles in which each element in the destination is taken from an element
// at the corresponding index in either source vectors.		// at the corresponding index in either source vectors.
bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {		bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
int MaskIndex = MaskIdx.value();		int MaskIndex = MaskIdx.value();
return MaskIndex < 0 \|\| MaskIdx.index() == (unsigned)MaskIndex % NumElts;		return MaskIndex < 0 \|\| MaskIdx.index() == (unsigned)MaskIndex % NumElts;
});		});

▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines	static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;		unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;		unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
MVT IndexVT = VT.changeTypeToInteger();		MVT IndexVT = VT.changeTypeToInteger();
// Since we can't introduce illegal index types at this stage, use i16 and		// Since we can't introduce illegal index types at this stage, use i16 and
// vrgatherei16 if the corresponding index type for plain vrgather is greater		// vrgatherei16 if the corresponding index type for plain vrgather is greater
// than XLenVT.		// than XLenVT.
if (IndexVT.getScalarType().bitsGT(XLenVT)) {		if (IndexVT.getScalarType().bitsGT(XLenVT)) {
GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;		GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
IndexVT = IndexVT.changeVectorElementType(MVT::i16);		IndexVT = IndexVT.changeVectorElementType(MVT::i16);
		reamesUnsubmitted Not Done Reply Inline Actions I don't have a concrete example, but thinking about it, this seems suspect. You're computing the select condition vector using the elements at the original offsets, but actually selecting between elements which have been slide up or down. That doesn't seem right, we should be having to adjust the select mask too. reames: I don't have a concrete example, but thinking about it, this seems suspect. You're computing…
}		}

MVT IndexContainerVT =		MVT IndexContainerVT =
ContainerVT.changeVectorElementType(IndexVT.getScalarType());		ContainerVT.changeVectorElementType(IndexVT.getScalarType());

SDValue Gather;		SDValue Gather;
// TODO: This doesn't trigger for i64 vectors on RV32, since there we		// TODO: This doesn't trigger for i64 vectors on RV32, since there we
// encounter a bitcasted BUILD_VECTOR with low/high i32 values.		// encounter a bitcasted BUILD_VECTOR with low/high i32 values.
▲ Show 20 Lines • Show All 11,342 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

	Show All 35 Lines
	; LMULMAX1-LABEL: hang_when_merging_stores_after_legalization:			; LMULMAX1-LABEL: hang_when_merging_stores_after_legalization:
	; LMULMAX1: # %bb.0:			; LMULMAX1: # %bb.0:
	; LMULMAX1-NEXT: li a0, 2			; LMULMAX1-NEXT: li a0, 2
	; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, ta, ma			; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
	; LMULMAX1-NEXT: vmv.s.x v0, a0			; LMULMAX1-NEXT: vmv.s.x v0, a0
	; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu			; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
	; LMULMAX1-NEXT: vrgather.vi v12, v8, 0			; LMULMAX1-NEXT: vrgather.vi v12, v8, 0
	; LMULMAX1-NEXT: vrgather.vi v12, v9, 3, v0.t			; LMULMAX1-NEXT: vrgather.vi v12, v9, 3, v0.t
	; LMULMAX1-NEXT: li a0, 8			; LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma
	; LMULMAX1-NEXT: vmv.s.x v0, a0			; LMULMAX1-NEXT: vslideup.vi v11, v10, 2
	; LMULMAX1-NEXT: vrgather.vi v9, v10, 0			; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, ma
	; LMULMAX1-NEXT: li a0, 3			; LMULMAX1-NEXT: vslideup.vi v11, v12, 0
	; LMULMAX1-NEXT: vmv.s.x v8, a0			; LMULMAX1-NEXT: vmv1r.v v8, v11
	; LMULMAX1-NEXT: vrgather.vi v9, v11, 3, v0.t
	; LMULMAX1-NEXT: vmv.v.v v0, v8
	; LMULMAX1-NEXT: vmerge.vvm v8, v9, v12, v0
	; LMULMAX1-NEXT: ret			; LMULMAX1-NEXT: ret
	;			;
	; LMULMAX2-LABEL: hang_when_merging_stores_after_legalization:			; LMULMAX2-LABEL: hang_when_merging_stores_after_legalization:
	; LMULMAX2: # %bb.0:			; LMULMAX2: # %bb.0:
	; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu			; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
	; LMULMAX2-NEXT: vid.v v12			; LMULMAX2-NEXT: vid.v v12
	; LMULMAX2-NEXT: li a0, 7			; LMULMAX2-NEXT: li a0, 7
	; LMULMAX2-NEXT: vmul.vx v14, v12, a0			; LMULMAX2-NEXT: vmul.vx v14, v12, a0
	▲ Show 20 Lines • Show All 233 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Show First 20 Lines • Show All 639 Lines • ▼ Show 20 Lines	entry:
%1 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>		%1 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>		%2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>		%3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%4 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>		%4 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
%5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> <i32 1, i32 2, i32 3, i32 4>		%5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i16> %5		ret <4 x i16> %5
}		}

define <8 x i8> @merge_start_into_end(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @slideup_into_end(<8 x i8> %v, <8 x i8> %w) {
		reamesUnsubmitted Not Done Reply Inline Actions Please land the test rename separately. I'd suggest something more generic like concat_4xi8 to avoid the strategy used needing a test rename. reames: Please land the test rename separately. I'd suggest something more generic like concat_4xi8 to…
; CHECK-LABEL: merge_start_into_end:		; CHECK-LABEL: slideup_into_end:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
		reamesUnsubmitted Not Done Reply Inline Actions This is the TA case I'd mentioned above, looks like we don't catch this. reames: This is the TA case I'd mentioned above, looks like we don't catch this.
		; CHECK-NEXT: vslideup.vi v8, v9, 4
		; CHECK-NEXT: ret
		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
		ret <8 x i8> %res
		}

		define <8 x i8> @slideup_into_end_undef(<8 x i8> %v, <8 x i8> %w) {
		; CHECK-LABEL: slideup_into_end_undef:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
		; CHECK-NEXT: vslideup.vi v8, v9, 4
		; CHECK-NEXT: ret
		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 undef, i32 10, i32 11>
		ret <8 x i8> %res
		}

		define <8 x i8> @slideup_into_end_undef_at_start(<8 x i8> %v, <8 x i8> %w) {
		; CHECK-LABEL: slideup_into_end_undef_at_start:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vid.v v11		; CHECK-NEXT: vid.v v11
; CHECK-NEXT: vrgather.vv v10, v8, v11		; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 240		; CHECK-NEXT: li a0, 224
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vadd.vi v8, v11, -4		; CHECK-NEXT: vadd.vi v8, v11, -4
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t		; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v10		; CHECK-NEXT: vmv1r.v v8, v10
		reamesUnsubmitted Done Reply Inline Actions This can be done with a single slide up. You don't need the select here at all. reames: This can be done with a single slide up. You don't need the select here at all.
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 9, i32 10, i32 11>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_start_into_end_non_contiguous:		; CHECK-LABEL: merge_start_into_end_non_contiguous:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vid.v v11		; CHECK-NEXT: vid.v v11
; CHECK-NEXT: vrgather.vv v10, v8, v11		; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 144		; CHECK-NEXT: li a0, 144
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vadd.vi v8, v11, -4		; CHECK-NEXT: vadd.vi v8, v11, -4
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t		; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v10		; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 11>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 11>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @slideup_end_into_end(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_end_into_end:		; CHECK-LABEL: slideup_end_into_end:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 15		; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma		; CHECK-NEXT: vslideup.vi v9, v8, 0
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @slideup_into_middle(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_start_into_middle:		; CHECK-LABEL: slideup_into_middle:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
; CHECK-NEXT: vid.v v11		; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 30
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vadd.vi v8, v11, -1
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @slideup_start_into_start(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_start_into_start:		; CHECK-LABEL: slideup_start_into_start:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 240		; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma		; CHECK-NEXT: vslideup.vi v8, v9, 0
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: merge_slidedown:		; CHECK-LABEL: merge_slidedown:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
Show All 30 Lines

; This shouldn't generate a vmerge because the elements of %w are not consecutive		; This shouldn't generate a vmerge because the elements of %w are not consecutive
define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {		define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: unmergable:		; CHECK-LABEL: unmergable:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vid.v v10		; CHECK-NEXT: vid.v v10
; CHECK-NEXT: vadd.vi v11, v10, 2		; CHECK-NEXT: vadd.vi v11, v10, 2
; CHECK-NEXT: lui a0, %hi(.LCPI44_0)		; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0)		; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
; CHECK-NEXT: vle8.v v12, (a0)		; CHECK-NEXT: vle8.v v12, (a0)
; CHECK-NEXT: li a0, 234		; CHECK-NEXT: li a0, 234
; CHECK-NEXT: vmv.s.x v0, a0		; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vrgather.vv v10, v8, v11		; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t		; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t
; CHECK-NEXT: vmv1r.v v8, v10		; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15>		%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15>
ret <8 x i8> %res		ret <8 x i8> %res
}		}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

Show First 20 Lines • Show All 144 Lines • ▼ Show 20 Lines
; V-NEXT: vnsrl.wi v8, v8, 0		; V-NEXT: vnsrl.wi v8, v8, 0
; V-NEXT: vse32.v v8, (a1)		; V-NEXT: vse32.v v8, (a1)
; V-NEXT: ret		; V-NEXT: ret
;		;
; ZVE32F-LABEL: vnsrl_0_i32:		; ZVE32F-LABEL: vnsrl_0_i32:
; ZVE32F: # %bb.0: # %entry		; ZVE32F: # %bb.0: # %entry
; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma		; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; ZVE32F-NEXT: vle32.v v8, (a0)		; ZVE32F-NEXT: vle32.v v8, (a0)
; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu		; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; ZVE32F-NEXT: vslidedown.vi v9, v8, 2		; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; ZVE32F-NEXT: li a0, 2		; ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma
		lukeAuthorUnsubmitted Done Reply Inline Actions So `rd=x0, rs1=x0` means to keep the existing VL. Could that mean these two vsetivli's could be merged into one `vsetivli zero, 2, e32, m1, tu, ma`, seeing as the second `tu` trumps the former `ta`? luke: So `rd=x0, rs1=x0` means to keep the existing VL. Could that mean these two vsetivli's could be…
		reamesUnsubmitted Not Done Reply Inline Actions Yes, though we can also just do better and use a single slidedown to form the shuffle here. Seems like a reasonable follow up patch. reames: Yes, though we can also just do better and use a single slidedown to form the shuffle here.
; ZVE32F-NEXT: vmv.s.x v0, a0		; ZVE32F-NEXT: vslideup.vi v8, v9, 1
; ZVE32F-NEXT: vrgather.vi v10, v8, 0		; ZVE32F-NEXT: vse32.v v8, (a1)
; ZVE32F-NEXT: vrgather.vi v10, v9, 0, v0.t
; ZVE32F-NEXT: vse32.v v10, (a1)
; ZVE32F-NEXT: ret		; ZVE32F-NEXT: ret
entry:		entry:
%0 = load <4 x i32>, ptr %in, align 4		%0 = load <4 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>		%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
store <2 x i32> %shuffle.i5, ptr %out, align 4		store <2 x i32> %shuffle.i5, ptr %out, align 4
ret void		ret void
}		}

Show All 36 Lines
; V-NEXT: vnsrl.wi v8, v8, 0		; V-NEXT: vnsrl.wi v8, v8, 0
; V-NEXT: vse32.v v8, (a1)		; V-NEXT: vse32.v v8, (a1)
; V-NEXT: ret		; V-NEXT: ret
;		;
; ZVE32F-LABEL: vnsrl_0_float:		; ZVE32F-LABEL: vnsrl_0_float:
; ZVE32F: # %bb.0: # %entry		; ZVE32F: # %bb.0: # %entry
; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma		; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; ZVE32F-NEXT: vle32.v v8, (a0)		; ZVE32F-NEXT: vle32.v v8, (a0)
; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu		; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; ZVE32F-NEXT: vslidedown.vi v9, v8, 2		; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; ZVE32F-NEXT: li a0, 2		; ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; ZVE32F-NEXT: vmv.s.x v0, a0		; ZVE32F-NEXT: vslideup.vi v8, v9, 1
; ZVE32F-NEXT: vrgather.vi v10, v8, 0		; ZVE32F-NEXT: vse32.v v8, (a1)
; ZVE32F-NEXT: vrgather.vi v10, v9, 0, v0.t
; ZVE32F-NEXT: vse32.v v10, (a1)
; ZVE32F-NEXT: ret		; ZVE32F-NEXT: ret
entry:		entry:
%0 = load <4 x float>, ptr %in, align 4		%0 = load <4 x float>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>		%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
store <2 x float> %shuffle.i5, ptr %out, align 4		store <2 x float> %shuffle.i5, ptr %out, align 4
ret void		ret void
}		}

Show All 27 Lines	entry:
ret void		ret void
}		}

define void @vnsrl_0_i64(ptr %in, ptr %out) {		define void @vnsrl_0_i64(ptr %in, ptr %out) {
; V-LABEL: vnsrl_0_i64:		; V-LABEL: vnsrl_0_i64:
; V: # %bb.0: # %entry		; V: # %bb.0: # %entry
; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma		; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; V-NEXT: vle64.v v8, (a0)		; V-NEXT: vle64.v v8, (a0)
; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu		; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; V-NEXT: vslidedown.vi v9, v8, 2		; V-NEXT: vslidedown.vi v9, v8, 2
; V-NEXT: li a0, 2		; V-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; V-NEXT: vmv.s.x v0, a0		; V-NEXT: vslideup.vi v8, v9, 1
; V-NEXT: vrgather.vi v10, v8, 0		; V-NEXT: vse64.v v8, (a1)
; V-NEXT: vrgather.vi v10, v9, 0, v0.t
; V-NEXT: vse64.v v10, (a1)
; V-NEXT: ret		; V-NEXT: ret
;		;
; ZVE32F-LABEL: vnsrl_0_i64:		; ZVE32F-LABEL: vnsrl_0_i64:
; ZVE32F: # %bb.0: # %entry		; ZVE32F: # %bb.0: # %entry
; ZVE32F-NEXT: ld a2, 16(a0)		; ZVE32F-NEXT: ld a2, 16(a0)
; ZVE32F-NEXT: ld a0, 0(a0)		; ZVE32F-NEXT: ld a0, 0(a0)
; ZVE32F-NEXT: sd a2, 8(a1)		; ZVE32F-NEXT: sd a2, 8(a1)
; ZVE32F-NEXT: sd a0, 0(a1)		; ZVE32F-NEXT: sd a0, 0(a1)
Show All 33 Lines	entry:
ret void		ret void
}		}

define void @vnsrl_0_double(ptr %in, ptr %out) {		define void @vnsrl_0_double(ptr %in, ptr %out) {
; V-LABEL: vnsrl_0_double:		; V-LABEL: vnsrl_0_double:
; V: # %bb.0: # %entry		; V: # %bb.0: # %entry
; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma		; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; V-NEXT: vle64.v v8, (a0)		; V-NEXT: vle64.v v8, (a0)
; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu		; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; V-NEXT: vslidedown.vi v9, v8, 2		; V-NEXT: vslidedown.vi v9, v8, 2
; V-NEXT: li a0, 2		; V-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; V-NEXT: vmv.s.x v0, a0		; V-NEXT: vslideup.vi v8, v9, 1
; V-NEXT: vrgather.vi v10, v8, 0		; V-NEXT: vse64.v v8, (a1)
; V-NEXT: vrgather.vi v10, v9, 0, v0.t
; V-NEXT: vse64.v v10, (a1)
; V-NEXT: ret		; V-NEXT: ret
;		;
; ZVE32F-LABEL: vnsrl_0_double:		; ZVE32F-LABEL: vnsrl_0_double:
; ZVE32F: # %bb.0: # %entry		; ZVE32F: # %bb.0: # %entry
; ZVE32F-NEXT: ld a2, 16(a0)		; ZVE32F-NEXT: ld a2, 16(a0)
; ZVE32F-NEXT: ld a0, 0(a0)		; ZVE32F-NEXT: ld a0, 0(a0)
; ZVE32F-NEXT: sd a2, 8(a1)		; ZVE32F-NEXT: sd a2, 8(a1)
; ZVE32F-NEXT: sd a0, 0(a1)		; ZVE32F-NEXT: sd a0, 0(a1)
Show All 35 Lines

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

	Show First 20 Lines • Show All 286 Lines • ▼ Show 20 Lines
	%retval = call {<4 x i32>, <4 x i32>} @llvm.experimental.vector.deinterleave2.v8i32(<8 x i32> %vec)			%retval = call {<4 x i32>, <4 x i32>} @llvm.experimental.vector.deinterleave2.v8i32(<8 x i32> %vec)
	ret {<4 x i32>, <4 x i32>} %retval			ret {<4 x i32>, <4 x i32>} %retval
	}			}

	define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {			define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
	; CHECK-LABEL: vector_deinterleave_v2i64_v4i64:			; CHECK-LABEL: vector_deinterleave_v2i64_v4i64:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma			; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
	; CHECK-NEXT: vslidedown.vi v12, v8, 2			; CHECK-NEXT: vslidedown.vi v10, v8, 2
	; CHECK-NEXT: li a0, 2			; CHECK-NEXT: li a0, 2
	; CHECK-NEXT: vmv.s.x v0, a0			; CHECK-NEXT: vmv.s.x v0, a0
	; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu			; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
	; CHECK-NEXT: vrgather.vi v10, v8, 0
	; CHECK-NEXT: vrgather.vi v10, v12, 0, v0.t
	; CHECK-NEXT: vrgather.vi v9, v8, 1			; CHECK-NEXT: vrgather.vi v9, v8, 1
	; CHECK-NEXT: vrgather.vi v9, v12, 1, v0.t			; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t
	; CHECK-NEXT: vmv.v.v v8, v10			; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
				; CHECK-NEXT: vslideup.vi v8, v10, 1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%retval = call {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> %vec)			%retval = call {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> %vec)
	ret {<2 x i64>, <2 x i64>} %retval			ret {<2 x i64>, <2 x i64>} %retval
	}			}

	declare {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1>)			declare {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1>)
	declare {<16 x i8>, <16 x i8>} @llvm.experimental.vector.deinterleave2.v32i8(<32 x i8>)			declare {<16 x i8>, <16 x i8>} @llvm.experimental.vector.deinterleave2.v32i8(<32 x i8>)
	declare {<8 x i16>, <8 x i16>} @llvm.experimental.vector.deinterleave2.v16i16(<16 x i16>)			declare {<8 x i16>, <8 x i16>} @llvm.experimental.vector.deinterleave2.v16i16(<16 x i16>)
	▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines
	%retval = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %vec)			%retval = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %vec)
	ret {<4 x float>, <4 x float>} %retval			ret {<4 x float>, <4 x float>} %retval
	}			}

	define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) {			define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) {
	; CHECK-LABEL: vector_deinterleave_v2f64_v4f64:			; CHECK-LABEL: vector_deinterleave_v2f64_v4f64:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma			; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
	; CHECK-NEXT: vslidedown.vi v12, v8, 2			; CHECK-NEXT: vslidedown.vi v10, v8, 2
	; CHECK-NEXT: li a0, 2			; CHECK-NEXT: li a0, 2
	; CHECK-NEXT: vmv.s.x v0, a0			; CHECK-NEXT: vmv.s.x v0, a0
	; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu			; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
	; CHECK-NEXT: vrgather.vi v10, v8, 0
	; CHECK-NEXT: vrgather.vi v10, v12, 0, v0.t
	; CHECK-NEXT: vrgather.vi v9, v8, 1			; CHECK-NEXT: vrgather.vi v9, v8, 1
	; CHECK-NEXT: vrgather.vi v9, v12, 1, v0.t			; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t
	; CHECK-NEXT: vmv.v.v v8, v10			; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
				; CHECK-NEXT: vslideup.vi v8, v10, 1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%retval = call {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec)			%retval = call {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec)
	ret {<2 x double>, <2 x double>} %retval			ret {<2 x double>, <2 x double>} %retval
	}			}

	declare {<2 x half>,<2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>)			declare {<2 x half>,<2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>)
	declare {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half>)			declare {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half>)
	declare {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float>)			declare {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float>)
	declare {<8 x half>, <8 x half>} @llvm.experimental.vector.deinterleave2.v16f16(<16 x half>)			declare {<8 x half>, <8 x half>} @llvm.experimental.vector.deinterleave2.v16f16(<16 x half>)
	declare {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float>)			declare {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float>)
	declare {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double>)			declare {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double>)

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Lower insert subvector shuffles as vslideups
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 508055

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Lower insert subvector shuffles as vslideupsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 508055

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

[RISCV] Lower insert subvector shuffles as vslideups
ClosedPublic