Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3731,20 +3731,6 @@ MVT XLenVT = Subtarget.getXLenVT(); MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first; - if (Index == 1 && NumSubElts + Index == (int)NumElts && - isa(InPlace)) { - if (SDValue Splat = cast(InPlace)->getSplatValue()) { - auto OpCode = - VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL; - auto Vec = DAG.getNode(OpCode, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), - convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget), - Splat, TrueMask, - DAG.getConstant(NumSubElts + Index, DL, XLenVT)); - return convertFromScalableVector(VT, Vec, DAG, Subtarget); - } - } - // We slide up by the index that the subvector is being inserted at, and set // VL to the index + the number of elements being inserted. unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; @@ -3762,6 +3748,64 @@ return convertFromScalableVector(VT, Slideup, DAG, Subtarget); } +/// Match v(f)slide1up/down idioms. These operations involve sliding +/// N-1 elements to make room for an inserted scalar at one end. +static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + const RISCVSubtarget &Subtarget, + SelectionDAG &DAG) { + // Don't try to reason about length changing shuffles + if (EVT(VT) != V1.getValueType()) + return SDValue(); + + bool OpsSwapped = false; + if (!isa(V1)) { + if (!isa(V2)) + return SDValue(); + std::swap(V1, V2); + OpsSwapped = true; + } + SDValue Splat = cast(V1)->getSplatValue(); + if (!Splat) + return SDValue(); + + // Return true if the mask could describe a slide of Mask.size() - 1 + // elements from concat_vector(V1, V2)[Base:] to [Offset:]. + auto isSlideMask = [](ArrayRef Mask, unsigned Base, int Offset) { + const unsigned S = (Offset > 0) ? 0 : -Offset; + const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0); + for (unsigned i = S; i != E; ++i) + if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset) + return false; + return true; + }; + + const unsigned NumElts = VT.getVectorNumElements(); + bool IsVSlideDown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1); + if (!IsVSlideDown && + !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1)) + return SDValue(); + + const int InsertIdx = Mask[IsVSlideDown ? (NumElts - 1) : 0]; + // Inserted lane must come from splat, undef scalar is legal but not profitable. + if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped) + return SDValue(); + + MVT XLenVT = Subtarget.getXLenVT(); + MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first; + auto OpCode = IsVSlideDown ? + (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : + (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); + auto Vec = DAG.getNode(OpCode, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), + convertToScalableVector(ContainerVT, V2, DAG, Subtarget), + Splat, TrueMask, + DAG.getConstant(NumElts, DL, XLenVT)); + return convertFromScalableVector(VT, Vec, DAG, Subtarget); +} + // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx // to create an interleaved vector of <[vscale x] n*2 x ty>. // This requires that the size of ty is less than the subtarget's maximum ELEN. @@ -3936,6 +3980,10 @@ ArrayRef Mask = SVN->getMask(); + if (SDValue V = + lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG)) + return V; + if (SDValue V = lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1down.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1down.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1down.ll @@ -8,9 +8,7 @@ ; CHECK-LABEL: vslide1down_2xi8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: ret %vb = insertelement <2 x i8> poison, i8 %b, i64 0 %v1 = shufflevector <2 x i8> %v, <2 x i8> %vb, <2 x i32> @@ -30,9 +28,7 @@ ; RV64-LABEL: vslide1down_4xi8: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vmv.v.x v9, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vslideup.vi v8, v9, 3 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: ret %vb = insertelement <4 x i8> poison, i8 %b, i64 0 %v1 = shufflevector <4 x i8> %v, <4 x i8> %vb, <4 x i32> @@ -52,9 +48,7 @@ ; RV64-LABEL: vslide1down_4xi8_swapped: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vmv.v.x v9, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vslideup.vi v8, v9, 3 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: ret %vb = insertelement <4 x i8> poison, i8 %b, i64 0 %v1 = shufflevector <4 x i8> %vb, <4 x i8> %v, <4 x i32> @@ -74,9 +68,7 @@ ; RV64-LABEL: vslide1down_2xi16: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v9, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vslideup.vi v8, v9, 1 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: ret %vb = insertelement <2 x i16> poison, i16 %b, i64 0 %v1 = shufflevector <2 x i16> %v, <2 x i16> %vb, <2 x i32> @@ -87,9 +79,7 @@ ; RV32-LABEL: vslide1down_4xi16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vmv.v.x v9, a0 -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vslideup.vi v8, v9, 3 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vslide1down_4xi16: @@ -109,9 +99,7 @@ ; RV32-LABEL: vslide1down_2xi32: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v9, a0 -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vslideup.vi v8, v9, 1 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vslide1down_2xi32: @@ -131,9 +119,7 @@ ; CHECK-LABEL: vslide1down_4xi32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: ret %vb = insertelement <4 x i32> poison, i32 %b, i64 0 %v1 = shufflevector <4 x i32> %v, <4 x i32> %vb, <4 x i32> @@ -158,9 +144,7 @@ ; RV64-LABEL: vslide1down_2xi64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v9, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vslideup.vi v8, v9, 1 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: ret %vb = insertelement <2 x i64> poison, i64 %b, i64 0 %v1 = shufflevector <2 x i64> %v, <2 x i64> %vb, <2 x i32> @@ -185,9 +169,7 @@ ; RV64-LABEL: vslide1down_4xi64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmv.v.x v10, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vslideup.vi v8, v10, 3 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: ret %vb = insertelement <4 x i64> poison, i64 %b, i64 0 %v1 = shufflevector <4 x i64> %v, <4 x i64> %vb, <4 x i32> @@ -198,9 +180,7 @@ ; CHECK-LABEL: vslide1down_2xf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret %vb = insertelement <2 x half> poison, half %b, i64 0 %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> @@ -211,9 +191,7 @@ ; CHECK-LABEL: vslide1down_4xf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret %vb = insertelement <4 x half> poison, half %b, i64 0 %v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> @@ -224,9 +202,7 @@ ; CHECK-LABEL: vslide1down_2xf32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret %vb = insertelement <2 x float> poison, float %b, i64 0 %v1 = shufflevector <2 x float> %v, <2 x float> %vb, <2 x i32> @@ -237,9 +213,7 @@ ; CHECK-LABEL: vslide1down_4xf32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret %vb = insertelement <4 x float> poison, float %b, i64 0 %v1 = shufflevector <4 x float> %v, <4 x float> %vb, <4 x i32> @@ -250,9 +224,7 @@ ; CHECK-LABEL: vslide1down_2xf64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret %vb = insertelement <2 x double> poison, double %b, i64 0 %v1 = shufflevector <2 x double> %v, <2 x double> %vb, <2 x i32> @@ -263,9 +235,7 @@ ; CHECK-LABEL: vslide1down_4xf64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslideup.vi v8, v10, 3 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret %vb = insertelement <4 x double> poison, double %b, i64 0 %v1 = shufflevector <4 x double> %v, <4 x double> %vb, <4 x i32> @@ -275,14 +245,8 @@ define <4 x i8> @vslide1down_4xi8_with_splat(<4 x i8> %v, i8 %b) { ; CHECK-LABEL: vslide1down_4xi8_with_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 7 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v10, v9, 1 -; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: ret %vb = insertelement <4 x i8> poison, i8 %b, i64 0 %v1 = shufflevector <4 x i8> %vb, <4 x i8> poison, <4 x i32> zeroinitializer Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll @@ -261,13 +261,8 @@ define <4 x i8> @vslide1up_4xi8_with_splat(<4 x i8> %v, i8 %b) { ; CHECK-LABEL: vslide1up_4xi8_with_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 14 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v10, v9, -1 -; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %vb = insertelement <4 x i8> poison, i8 %b, i64 0 @@ -414,3 +409,18 @@ %v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> ret <4 x i8> %v2 } + +define <2 x i8> @vslide1up_4xi8_neg_length_changing(<4 x i8> %v, i8 %b) { +; CHECK-LABEL: vslide1up_4xi8_neg_length_changing: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v1 = insertelement <4 x i8> %v, i8 %b, i64 0 + %v2 = shufflevector <4 x i8> %v1, <4 x i8> %v, <2 x i32> + ret <2 x i8> %v2 +}