Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12467,7 +12467,37 @@ return Src.getOperand(0); // TODO: Use insert_subvector/extract_subvector to change widen/narrow? } - break; + [[fallthrough]]; + } + case RISCVISD::VMV_S_X_VL: { + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue Slide = N->getOperand(0); + SDValue VL = N->getOperand(2); + + if (Slide.getOpcode() != RISCVISD::VSLIDEUP_VL || + VL != Slide->getOperand(4) || !isNonZeroAVL(VL)) + return SDValue(); + + if (Slide->getValueType(0).getVectorElementType().getSizeInBits() != + VT.getVectorElementType().getSizeInBits()) + return SDValue(); + + // Note: Only actually need lane 0 of the mask to be active here, but + // checking for a true mask is easier. + SDValue SlideMask = Slide->getOperand(3); + if (SlideMask.getOpcode() != RISCVISD::VMSET_VL || + SlideMask.getOperand(0) != VL) + return SDValue(); + + auto *ImmOff = dyn_cast(Slide->getOperand(2)); + if (!ImmOff || 1 != ImmOff->getZExtValue()) + return SDValue(); + + auto OpCode = + (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); + return DAG.getNode(OpCode, DL, VT, Slide->getOperand(0), + Slide->getOperand(1), N->getOperand(1), SlideMask, VL); } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = N->getConstantOperandVal(0); Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll @@ -280,7 +280,7 @@ ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v1 = shufflevector <2 x double> %v, <2 x double> poison, <2 x i32> + %v1 = shufflevector <2 x double> %v, <2 x double> poison, <2 x i32> %v2 = insertelement <2 x double> %v1, double %b, i64 0 ret <2 x double> %v2 } @@ -289,9 +289,7 @@ ; CHECK-LABEL: vslide1up_4xi8_inverted: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vslide1up.vx v9, v8, a0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %v1 = shufflevector <4 x i8> %v, <4 x i8> poison, <4 x i32> @@ -299,6 +297,77 @@ ret <4 x i8> %v2 } +define <4 x i16> @vslide1up_4xi16_inverted(<4 x i16> %v, i16 %b) { +; CHECK-LABEL: vslide1up_4xi16_inverted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v1 = shufflevector <4 x i16> %v, <4 x i16> poison, <4 x i32> + %v2 = insertelement <4 x i16> %v1, i16 %b, i64 0 + ret <4 x i16> %v2 +} + +define <4 x i32> @vslide1up_4xi32_inverted(<4 x i32> %v, i32 %b) { +; CHECK-LABEL: vslide1up_4xi32_inverted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v1 = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> + %v2 = insertelement <4 x i32> %v1, i32 %b, i64 0 + ret <4 x i32> %v2 +} + +define <4 x i64> @vslide1up_4xi64_inverted(<4 x i64> %v, i64 %b) { +; RV32-LABEL: vslide1up_4xi64_inverted: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vslideup.vi v10, v8, 1 +; RV32-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vslide1up_4xi64_inverted: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vslide1up.vx v10, v8, a0 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %v1 = shufflevector <4 x i64> %v, <4 x i64> poison, <4 x i32> + %v2 = insertelement <4 x i64> %v1, i64 %b, i64 0 + ret <4 x i64> %v2 +} + +define <4 x float> @vslide1up_4xf32_inverted(<4 x float> %v, float %b) { +; CHECK-LABEL: vslide1up_4xf32_inverted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v1 = shufflevector <4 x float> %v, <4 x float> poison, <4 x i32> + %v2 = insertelement <4 x float> %v1, float %b, i64 0 + ret <4 x float> %v2 +} + +define <4 x double> @vslide1up_4xf64_inverted(<4 x double> %v, double %b) { +; CHECK-LABEL: vslide1up_4xf64_inverted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v1 = shufflevector <4 x double> %v, <4 x double> poison, <4 x i32> + %v2 = insertelement <4 x double> %v1, double %b, i64 0 + ret <4 x double> %v2 +} + + define <2 x double> @vslide1up_2xf64_as_rotate(<2 x double> %v, double %b) { ; CHECK-LABEL: vslide1up_2xf64_as_rotate: ; CHECK: # %bb.0: @@ -373,8 +442,8 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert(<4 x i8> %v, i8 %b) { ; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI23_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0) +; CHECK-NEXT: lui a0, %hi(.LCPI28_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: vrgather.vv v9, v8, v10 @@ -399,8 +468,8 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) { ; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert3: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI25_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0) +; CHECK-NEXT: lui a0, %hi(.LCPI30_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: vrgather.vv v9, v8, v10