This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/RISCV/
-
Target/
-
RISCV/
2
RISCVISelLowering.cpp
-
test/CodeGen/RISCV/rvv/
-
CodeGen/
-
RISCV/
-
rvv/
-
fixed-vector-shuffle-vslide1up.ll

Differential D151823

[RISCV] Combine vmv.s.x (vslideup pt, v, 1, <all-ones>, vl), scalar, vl to vslide1up
Needs ReviewPublic

Authored by reames on May 31 2023, 11:23 AM.

Download Raw Diff

Details

Reviewers

craig.topper
• l.frisken
asb
frasercrmck

Summary

This covers the case where we insert a scalar into lane 0 of a vector after performing the slideup. This was triggered by offline discussion about D151736, and is an alternative to that approach. I think this is cleaner and a potentially more general.

A couple things we have to be careful of:

The type sizes need to match. We implicitly truncate the scalar, and the shift is in SEW units.
We need to avoid VL=0 and a mask with lane-0 disabled. The former is just tricky to reason about, the later differs in behavior between the two forms.

Diff Detail

Event Timeline

reames created this revision.May 31 2023, 11:23 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 31 2023, 11:23 AM

Herald added subscribers: jobnoorman, luke, VincentWu and 28 others. · View Herald Transcript

reames requested review of this revision.May 31 2023, 11:23 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 31 2023, 11:23 AM

Herald added subscribers: • pcwang-thead, eopXD, MaskRay. · View Herald Transcript

Harbormaster completed remote builds in B235631: Diff 527136.May 31 2023, 12:45 PM

luke added inline comments.May 31 2023, 5:35 PM

llvm/lib/Target/RISCV/RISCVISelLowering.cpp
12480	It's not documented in `RISCVISelLowering.h` but I believe `RISCVISD::VSLIDEUP_VL` takes a policy operand (for both the mask and tail). It doesn't look like `RISCVISD::VSLIDE1UP_VL` has it though, so I'm not sure what policy it uses. Either way do we need to check the vslideup's policy here?
12482	Nit, could possibly use `Slide.getScalarValueSizeInBits() != VT.getScalarSizeInBits()`

Revision Contents

Path

Size

llvm/

lib/

Target/

RISCV/

RISCVISelLowering.cpp

32 lines

test/

CodeGen/

RISCV/

rvv/

fixed-vector-shuffle-vslide1up.ll

85 lines

Diff 527136

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 12,461 Lines • ▼ Show 20 Lines	if (N->getOperand(0).isUndef() &&
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
EVT SrcVT = Src.getOperand(0).getValueType();		EVT SrcVT = Src.getOperand(0).getValueType();
assert(SrcVT.getVectorElementType() == VT.getVectorElementType());		assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
// Widths match, just return the original vector.		// Widths match, just return the original vector.
if (SrcVT == VT)		if (SrcVT == VT)
return Src.getOperand(0);		return Src.getOperand(0);
// TODO: Use insert_subvector/extract_subvector to change widen/narrow?		// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
}		}
break;		[[fallthrough]];
		}
		case RISCVISD::VMV_S_X_VL: {
		EVT VT = N->getValueType(0);
		SDLoc DL(N);
		SDValue Slide = N->getOperand(0);
		SDValue VL = N->getOperand(2);

		if (Slide.getOpcode() != RISCVISD::VSLIDEUP_VL \|\|
		VL != Slide->getOperand(4) \|\| !isNonZeroAVL(VL))
		return SDValue();
		lukeUnsubmitted Not Done Reply Inline Actions It's not documented in `RISCVISelLowering.h` but I believe `RISCVISD::VSLIDEUP_VL` takes a policy operand (for both the mask and tail). It doesn't look like `RISCVISD::VSLIDE1UP_VL` has it though, so I'm not sure what policy it uses. Either way do we need to check the vslideup's policy here? luke: It's not documented in `RISCVISelLowering.h` but I believe `RISCVISD::VSLIDEUP_VL` takes a…

		if (Slide->getValueType(0).getVectorElementType().getSizeInBits() !=
		lukeUnsubmitted Not Done Reply Inline Actions Nit, could possibly use `Slide.getScalarValueSizeInBits() != VT.getScalarSizeInBits()` luke: Nit, could possibly use `Slide.getScalarValueSizeInBits() != VT.getScalarSizeInBits()`
		VT.getVectorElementType().getSizeInBits())
		return SDValue();

		// Note: Only actually need lane 0 of the mask to be active here, but
		// checking for a true mask is easier.
		SDValue SlideMask = Slide->getOperand(3);
		if (SlideMask.getOpcode() != RISCVISD::VMSET_VL \|\|
		SlideMask.getOperand(0) != VL)
		return SDValue();

		auto *ImmOff = dyn_cast<ConstantSDNode>(Slide->getOperand(2));
		if (!ImmOff \|\| 1 != ImmOff->getZExtValue())
		return SDValue();

		auto OpCode =
		(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
		return DAG.getNode(OpCode, DL, VT, Slide->getOperand(0),
		Slide->getOperand(1), N->getOperand(1), SlideMask, VL);
}		}
case ISD::INTRINSIC_WO_CHAIN: {		case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = N->getConstantOperandVal(0);		unsigned IntNo = N->getConstantOperandVal(0);
switch (IntNo) {		switch (IntNo) {
// By default we do not combine any intrinsic.		// By default we do not combine any intrinsic.
default:		default:
return SDValue();		return SDValue();
case Intrinsic::riscv_vcpop:		case Intrinsic::riscv_vcpop:
▲ Show 20 Lines • Show All 4,087 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll

	Show First 20 Lines • Show All 274 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: vslide1up_v2f64_inverted:			; CHECK-LABEL: vslide1up_v2f64_inverted:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma			; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
	; CHECK-NEXT: vrgather.vi v9, v8, 0			; CHECK-NEXT: vrgather.vi v9, v8, 0
	; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma			; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
	; CHECK-NEXT: vfmv.s.f v9, fa0			; CHECK-NEXT: vfmv.s.f v9, fa0
	; CHECK-NEXT: vmv1r.v v8, v9			; CHECK-NEXT: vmv1r.v v8, v9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = shufflevector <2 x double> %v, <2 x double> poison, <2 x i32> <i32 0, i32 0>			%v1 = shufflevector <2 x double> %v, <2 x double> poison, <2 x i32> <i32 undef, i32 0>
	%v2 = insertelement <2 x double> %v1, double %b, i64 0			%v2 = insertelement <2 x double> %v1, double %b, i64 0
	ret <2 x double> %v2			ret <2 x double> %v2
	}			}

	define <4 x i8> @vslide1up_4xi8_inverted(<4 x i8> %v, i8 %b) {			define <4 x i8> @vslide1up_4xi8_inverted(<4 x i8> %v, i8 %b) {
	; CHECK-LABEL: vslide1up_4xi8_inverted:			; CHECK-LABEL: vslide1up_4xi8_inverted:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma			; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
	; CHECK-NEXT: vslideup.vi v9, v8, 1			; CHECK-NEXT: vslide1up.vx v9, v8, a0
	; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma
	; CHECK-NEXT: vmv.s.x v9, a0
	; CHECK-NEXT: vmv1r.v v8, v9			; CHECK-NEXT: vmv1r.v v8, v9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = shufflevector <4 x i8> %v, <4 x i8> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>			%v1 = shufflevector <4 x i8> %v, <4 x i8> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
	%v2 = insertelement <4 x i8> %v1, i8 %b, i64 0			%v2 = insertelement <4 x i8> %v1, i8 %b, i64 0
	ret <4 x i8> %v2			ret <4 x i8> %v2
	}			}

				define <4 x i16> @vslide1up_4xi16_inverted(<4 x i16> %v, i16 %b) {
				; CHECK-LABEL: vslide1up_4xi16_inverted:
				; CHECK: # %bb.0:
				; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
				; CHECK-NEXT: vslide1up.vx v9, v8, a0
				; CHECK-NEXT: vmv1r.v v8, v9
				; CHECK-NEXT: ret
				%v1 = shufflevector <4 x i16> %v, <4 x i16> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
				%v2 = insertelement <4 x i16> %v1, i16 %b, i64 0
				ret <4 x i16> %v2
				}

				define <4 x i32> @vslide1up_4xi32_inverted(<4 x i32> %v, i32 %b) {
				; CHECK-LABEL: vslide1up_4xi32_inverted:
				; CHECK: # %bb.0:
				; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
				; CHECK-NEXT: vslide1up.vx v9, v8, a0
				; CHECK-NEXT: vmv.v.v v8, v9
				; CHECK-NEXT: ret
				%v1 = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
				%v2 = insertelement <4 x i32> %v1, i32 %b, i64 0
				ret <4 x i32> %v2
				}

				define <4 x i64> @vslide1up_4xi64_inverted(<4 x i64> %v, i64 %b) {
				; RV32-LABEL: vslide1up_4xi64_inverted:
				; RV32: # %bb.0:
				; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
				; RV32-NEXT: vslideup.vi v10, v8, 1
				; RV32-NEXT: vsetivli zero, 2, e32, m2, tu, ma
				; RV32-NEXT: vslide1down.vx v10, v10, a0
				; RV32-NEXT: vslide1down.vx v10, v10, a1
				; RV32-NEXT: vmv2r.v v8, v10
				; RV32-NEXT: ret
				;
				; RV64-LABEL: vslide1up_4xi64_inverted:
				; RV64: # %bb.0:
				; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
				; RV64-NEXT: vslide1up.vx v10, v8, a0
				; RV64-NEXT: vmv.v.v v8, v10
				; RV64-NEXT: ret
				%v1 = shufflevector <4 x i64> %v, <4 x i64> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
				%v2 = insertelement <4 x i64> %v1, i64 %b, i64 0
				ret <4 x i64> %v2
				}

				define <4 x float> @vslide1up_4xf32_inverted(<4 x float> %v, float %b) {
				; CHECK-LABEL: vslide1up_4xf32_inverted:
				; CHECK: # %bb.0:
				; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
				; CHECK-NEXT: vfslide1up.vf v9, v8, fa0
				; CHECK-NEXT: vmv.v.v v8, v9
				; CHECK-NEXT: ret
				%v1 = shufflevector <4 x float> %v, <4 x float> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
				%v2 = insertelement <4 x float> %v1, float %b, i64 0
				ret <4 x float> %v2
				}

				define <4 x double> @vslide1up_4xf64_inverted(<4 x double> %v, double %b) {
				; CHECK-LABEL: vslide1up_4xf64_inverted:
				; CHECK: # %bb.0:
				; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
				; CHECK-NEXT: vfslide1up.vf v10, v8, fa0
				; CHECK-NEXT: vmv.v.v v8, v10
				; CHECK-NEXT: ret
				%v1 = shufflevector <4 x double> %v, <4 x double> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
				%v2 = insertelement <4 x double> %v1, double %b, i64 0
				ret <4 x double> %v2
				}


	define <2 x double> @vslide1up_2xf64_as_rotate(<2 x double> %v, double %b) {			define <2 x double> @vslide1up_2xf64_as_rotate(<2 x double> %v, double %b) {
	; CHECK-LABEL: vslide1up_2xf64_as_rotate:			; CHECK-LABEL: vslide1up_2xf64_as_rotate:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma			; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
	; CHECK-NEXT: vfmv.s.f v9, fa0			; CHECK-NEXT: vfmv.s.f v9, fa0
	; CHECK-NEXT: vslideup.vi v8, v9, 1			; CHECK-NEXT: vslideup.vi v8, v9, 1
	; CHECK-NEXT: vslidedown.vi v9, v8, 1			; CHECK-NEXT: vslidedown.vi v9, v8, 1
	; CHECK-NEXT: vslideup.vi v9, v8, 1			; CHECK-NEXT: vslideup.vi v9, v8, 1
	▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 0, i32 4, i32 5, i32 6>			%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
	ret <4 x i8> %v2			ret <4 x i8> %v2
	}			}

	define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert(<4 x i8> %v, i8 %b) {			define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert(<4 x i8> %v, i8 %b) {
	; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert:			; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: lui a0, %hi(.LCPI23_0)			; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
	; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0)			; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0)
	; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma			; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
	; CHECK-NEXT: vle8.v v10, (a0)			; CHECK-NEXT: vle8.v v10, (a0)
	; CHECK-NEXT: vrgather.vv v9, v8, v10			; CHECK-NEXT: vrgather.vv v9, v8, v10
	; CHECK-NEXT: vmv1r.v v8, v9			; CHECK-NEXT: vmv1r.v v8, v9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 4, i32 4, i32 5, i32 6>			%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 4, i32 4, i32 5, i32 6>
	ret <4 x i8> %v2			ret <4 x i8> %v2
	}			}

	define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert2(<4 x i8> %v, i8 %b) {			define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert2(<4 x i8> %v, i8 %b) {
	; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert2:			; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert2:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma			; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
	; CHECK-NEXT: vslidedown.vi v9, v8, 3			; CHECK-NEXT: vslidedown.vi v9, v8, 3
	; CHECK-NEXT: vslideup.vi v9, v8, 1			; CHECK-NEXT: vslideup.vi v9, v8, 1
	; CHECK-NEXT: vmv1r.v v8, v9			; CHECK-NEXT: vmv1r.v v8, v9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 7, i32 4, i32 5, i32 6>			%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 7, i32 4, i32 5, i32 6>
	ret <4 x i8> %v2			ret <4 x i8> %v2
	}			}

	define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) {			define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) {
	; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert3:			; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: lui a0, %hi(.LCPI25_0)			; CHECK-NEXT: lui a0, %hi(.LCPI30_0)
	; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0)			; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0)
	; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma			; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
	; CHECK-NEXT: vle8.v v10, (a0)			; CHECK-NEXT: vle8.v v10, (a0)
	; CHECK-NEXT: vrgather.vv v9, v8, v10			; CHECK-NEXT: vrgather.vv v9, v8, v10
	; CHECK-NEXT: vmv1r.v v8, v9			; CHECK-NEXT: vmv1r.v v8, v9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 5, i32 4, i32 5, i32 6>			%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 5, i32 4, i32 5, i32 6>
	ret <4 x i8> %v2			ret <4 x i8> %v2
	}			}
	Show All 15 Lines