diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1803,6 +1803,7 @@ int64_t StepNumerator; unsigned StepDenominator; int64_t Addend; + int InconsistentIdx; // The inconsistent element index, -1 means none. }; // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] @@ -1815,7 +1816,11 @@ // Note that this method will also match potentially unappealing index // sequences, like , however it is left to the caller to // determine whether this is worth generating code for. -static Optional isSimpleVIDSequence(SDValue Op) { +// With the given IgnoredIdx >= 0, this function will ignore this index during +// the check.It will allow us to Identify some sequences like . +static Optional isSimpleVIDSequenceImpl(SDValue Op, + int IgnoredIdx = -1) { unsigned NumElts = Op.getNumOperands(); assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); if (!Op.getValueType().isInteger()) @@ -1828,7 +1833,8 @@ for (unsigned Idx = 0; Idx < NumElts; Idx++) { // Assume undef elements match the sequence; we just have to be careful // when interpolating across them. - if (Op.getOperand(Idx).isUndef()) + if (Op.getOperand(Idx).isUndef() || + (IgnoredIdx >= 0 && Idx == unsigned(IgnoredIdx))) continue; // The BUILD_VECTOR must be all constants. if (!isa(Op.getOperand(Idx))) @@ -1882,7 +1888,8 @@ // Loop back through the sequence and validate elements we might have skipped // while waiting for a valid step. While doing this, log any sequence addend. for (unsigned Idx = 0; Idx < NumElts; Idx++) { - if (Op.getOperand(Idx).isUndef()) + if (Op.getOperand(Idx).isUndef() || + (IgnoredIdx >= 0 && Idx == unsigned(IgnoredIdx))) continue; uint64_t Val = Op.getConstantOperandVal(Idx) & maskTrailingOnes(EltSizeInBits); @@ -1897,7 +1904,16 @@ assert(SeqAddend && "Must have an addend if we have a step"); - return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; + return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend, IgnoredIdx}; +} + +static Optional isSimpleVIDSequence(SDValue Op) { + if (auto VS = isSimpleVIDSequenceImpl(Op)) + return VS; + for (size_t i = 0; i < Op.getNumOperands(); i++) + if (auto VS = isSimpleVIDSequenceImpl(Op, i)) + return VS; + return None; } // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT @@ -2072,6 +2088,7 @@ int64_t StepNumerator = SimpleVID->StepNumerator; unsigned StepDenominator = SimpleVID->StepDenominator; int64_t Addend = SimpleVID->Addend; + int InconsistentIdx = SimpleVID->InconsistentIdx; assert(StepNumerator != 0 && "Invalid step"); bool Negate = false; @@ -2115,6 +2132,11 @@ VT, DL, DAG.getConstant(Addend, DL, XLenVT)); VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID); } + if (InconsistentIdx >= 0) { + VID = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, VID, + Op.getOperand(InconsistentIdx), + DAG.getConstant(InconsistentIdx, DL, XLenVT)); + } return VID; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -133,10 +133,13 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0) ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; RV32-NEXT: vle16.v v14, (a0) +; RV32-NEXT: vid.v v12 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vrsub.vi v14, v12, 4 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; RV32-NEXT: vmv.s.x v14, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v8, v14 ; RV32-NEXT: li a0, 8 @@ -149,10 +152,14 @@ ; ; RV64-LABEL: vrgather_shuffle_vv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI6_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0) ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vle64.v v14, (a0) +; RV64-NEXT: vid.v v12 +; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vrsub.vi v14, v12, 4 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; RV64-NEXT: vmv.s.x v14, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV64-NEXT: vrgather.vv v12, v8, v14 ; RV64-NEXT: li a0, 8 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -24,15 +24,17 @@ ret void } -; TODO: Could do VID then insertelement on missing elements define void @buildvec_notquite_vid_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: buildvec_notquite_vid_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0) +; CHECK-NEXT: li a1, 3 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vle8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, mu +; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vse8.v v9, (a0) ; CHECK-NEXT: ret store <16 x i8> , <16 x i8>* %x ret void @@ -191,15 +193,15 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() { ; RV32-LABEL: buildvec_vid_step1_add0_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu -; RV32-NEXT: vslideup.vi v8, v9, 2 +; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsrl.vi v8, v8, 1 +; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; RV32-NEXT: vslideup.vi v8, v9, 3 ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: ret ; @@ -240,14 +242,22 @@ define void @buildvec_no_vid_v4i8(<4 x i8>* %z0, <4 x i8>* %z1, <4 x i8>* %z2, <4 x i8>* %z3, <4 x i8>* %z4, <4 x i8>* %z5) { ; RV32-LABEL: buildvec_no_vid_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a6, %hi(.LCPI14_0) -; RV32-NEXT: addi a6, a6, %lo(.LCPI14_0) +; RV32-NEXT: li a6, 6 +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV32-NEXT: vmv.s.x v8, a6 +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vadd.vv v9, v9, v9 +; RV32-NEXT: vadd.vi v9, v9, 1 +; RV32-NEXT: vsetivli zero, 3, e8, mf4, tu, mu +; RV32-NEXT: vmv1r.v v10, v9 +; RV32-NEXT: vslideup.vi v10, v8, 2 +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV32-NEXT: vse8.v v10, (a0) +; RV32-NEXT: li a0, 2 +; RV32-NEXT: vmv.s.x v8, a0 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, tu, mu +; RV32-NEXT: vslideup.vi v9, v8, 1 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; RV32-NEXT: vle8.v v8, (a6) -; RV32-NEXT: lui a6, %hi(.LCPI14_1) -; RV32-NEXT: addi a6, a6, %lo(.LCPI14_1) -; RV32-NEXT: vle8.v v9, (a6) -; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: lui a0, 1 ; RV32-NEXT: addi a0, a0, -2048 @@ -259,8 +269,8 @@ ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; RV32-NEXT: vmv.v.x v8, a0 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; RV32-NEXT: lui a0, %hi(.LCPI14_2) -; RV32-NEXT: addi a0, a0, %lo(.LCPI14_2) +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI14_0) ; RV32-NEXT: vle8.v v9, (a0) ; RV32-NEXT: vse8.v v8, (a3) ; RV32-NEXT: vmv.v.i v8, -2 @@ -270,14 +280,22 @@ ; ; RV64-LABEL: buildvec_no_vid_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a6, %hi(.LCPI14_0) -; RV64-NEXT: addi a6, a6, %lo(.LCPI14_0) +; RV64-NEXT: li a6, 6 +; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV64-NEXT: vmv.s.x v8, a6 +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vadd.vv v9, v9, v9 +; RV64-NEXT: vadd.vi v9, v9, 1 +; RV64-NEXT: vsetivli zero, 3, e8, mf4, tu, mu +; RV64-NEXT: vmv1r.v v10, v9 +; RV64-NEXT: vslideup.vi v10, v8, 2 +; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV64-NEXT: vse8.v v10, (a0) +; RV64-NEXT: li a0, 2 +; RV64-NEXT: vmv.s.x v8, a0 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, tu, mu +; RV64-NEXT: vslideup.vi v9, v8, 1 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; RV64-NEXT: vle8.v v8, (a6) -; RV64-NEXT: lui a6, %hi(.LCPI14_1) -; RV64-NEXT: addi a6, a6, %lo(.LCPI14_1) -; RV64-NEXT: vle8.v v9, (a6) -; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: lui a0, 1 ; RV64-NEXT: addiw a0, a0, -2048 @@ -289,8 +307,8 @@ ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; RV64-NEXT: lui a0, %hi(.LCPI14_2) -; RV64-NEXT: addi a0, a0, %lo(.LCPI14_2) +; RV64-NEXT: lui a0, %hi(.LCPI14_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI14_0) ; RV64-NEXT: vle8.v v9, (a0) ; RV64-NEXT: vse8.v v8, (a3) ; RV64-NEXT: vmv.v.i v8, -2 @@ -537,53 +555,27 @@ } define void @buildvec_vid_step1o2_v4i32(<4 x i32>* %z0, <4 x i32>* %z1, <4 x i32>* %z2, <4 x i32>* %z3, <4 x i32>* %z4, <4 x i32>* %z5, <4 x i32>* %z6) { -; RV32-LABEL: buildvec_vid_step1o2_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vid.v v8 -; RV32-NEXT: vsrl.vi v8, v8, 1 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: vmv.v.i v9, 1 -; RV32-NEXT: vse32.v v8, (a2) -; RV32-NEXT: vse32.v v8, (a3) -; RV32-NEXT: vse32.v v8, (a4) -; RV32-NEXT: vmv.s.x v8, zero -; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; RV32-NEXT: vslideup.vi v9, v8, 1 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vse32.v v9, (a5) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; RV32-NEXT: vslideup.vi v9, v8, 3 -; RV32-NEXT: vse32.v v9, (a6) -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_vid_step1o2_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vsrl.vi v8, v8, 1 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: vmv.v.i v9, 1 -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: vse32.v v8, (a2) -; RV64-NEXT: vse32.v v8, (a3) -; RV64-NEXT: vse32.v v8, (a4) -; RV64-NEXT: vmv.s.x v8, zero -; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; RV64-NEXT: vslideup.vi v9, v8, 1 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vse32.v v9, (a5) -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; RV64-NEXT: vslideup.vi v9, v8, 3 -; RV64-NEXT: vse32.v v9, (a6) -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_vid_step1o2_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: vse32.v v8, (a3) +; CHECK-NEXT: vse32.v v8, (a4) +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a5) +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: vse32.v v8, (a6) +; CHECK-NEXT: ret store <4 x i32> , <4 x i32>* %z0 store <4 x i32> , <4 x i32>* %z1 store <4 x i32> , <4 x i32>* %z2 @@ -609,17 +601,14 @@ ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: vse16.v v8, (a4) -; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v10, 4 -; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vse16.v v10, (a5) ; CHECK-NEXT: li a0, 4 -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vse16.v v8, (a5) ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; CHECK-NEXT: vslideup.vi v9, v8, 3 +; CHECK-NEXT: vslideup.vi v9, v10, 3 ; CHECK-NEXT: vse16.v v9, (a6) ; CHECK-NEXT: ret store <4 x i16> , <4 x i16>* %z0 @@ -727,10 +716,11 @@ define <4 x i8> @buildvec_not_vid_v4i8_1() { ; CHECK-LABEL: buildvec_not_vid_v4i8_1: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI37_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret ret <4 x i8> } @@ -738,10 +728,13 @@ define <4 x i8> @buildvec_not_vid_v4i8_2() { ; CHECK-LABEL: buildvec_not_vid_v4i8_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0) +; CHECK-NEXT: li a0, 3 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vi v8, v8, 3 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret ret <4 x i8> } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -85,10 +85,14 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: vrgather_shuffle_vv_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vle16.v v11, (a0) +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vv v10, v10, v10 +; CHECK-NEXT: vrsub.vi v11, v10, 4 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; CHECK-NEXT: vmv.s.x v11, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu @@ -459,12 +463,12 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, mu -; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrsub.vi v11, v11, 6 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v11, v10, 1 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1210,19 +1210,17 @@ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vid.v v10 ; RV32-NEXT: vsrl.vi v10, v10, 1 -; RV32-NEXT: vrsub.vi v10, v10, 0 +; RV32-NEXT: vrsub.vi v11, v10, 0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vmadd.vv v10, v8, v9 -; RV32-NEXT: li a1, 1 +; RV32-NEXT: vmadd.vv v11, v8, v9 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu -; RV32-NEXT: vslideup.vi v9, v8, 2 +; RV32-NEXT: vmv.s.x v8, zero +; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; RV32-NEXT: vslideup.vi v10, v8, 3 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vsra.vv v8, v10, v9 +; RV32-NEXT: vsra.vv v8, v11, v10 ; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsrl.vx v9, v10, a1 +; RV32-NEXT: vsrl.vx v9, v11, a1 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret @@ -4156,16 +4154,19 @@ ; LMULMAX1-LABEL: mulhu_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; LMULMAX1-NEXT: vle16.v v8, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: lui a2, %hi(.LCPI130_0) -; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI130_0) -; LMULMAX1-NEXT: vle16.v v9, (a2) -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: vdivu.vv v8, v8, v9 -; LMULMAX1-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vle16.v v9, (a1) +; LMULMAX1-NEXT: vid.v v10 +; LMULMAX1-NEXT: vadd.vi v10, v10, 8 +; LMULMAX1-NEXT: li a2, 7 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, m1, tu, mu +; LMULMAX1-NEXT: vmv.s.x v10, a2 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; LMULMAX1-NEXT: vdivu.vv v9, v9, v10 +; LMULMAX1-NEXT: vdivu.vv v8, v8, v10 +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: vse16.v v9, (a1) ; LMULMAX1-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = udiv <16 x i16> %a, @@ -4240,16 +4241,20 @@ ; LMULMAX1-RV64-LABEL: mulhu_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI131_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI131_0) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v10, (a0) -; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) +; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) +; LMULMAX1-RV64-NEXT: li a2, 9 +; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV64-NEXT: vid.v v11 +; LMULMAX1-RV64-NEXT: vadd.vi v11, v11, 5 +; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; LMULMAX1-RV64-NEXT: vslideup.vi v11, v10, 3 +; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 +; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV64-NEXT: vse32.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = udiv <8 x i32> %a, @@ -4691,13 +4696,18 @@ ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI136_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI136_0) ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) +; LMULMAX1-RV32-NEXT: vmv.v.i v10, 3 +; LMULMAX1-RV32-NEXT: vid.v v11 +; LMULMAX1-RV32-NEXT: li a2, -3 +; LMULMAX1-RV32-NEXT: vmadd.vx v11, a2, v10 +; LMULMAX1-RV32-NEXT: li a2, -1 +; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; LMULMAX1-RV32-NEXT: vslideup.vi v11, v10, 3 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -754,13 +754,15 @@ ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64MV-NEXT: addi a1, sp, 32 ; RV64MV-NEXT: vle64.v v8, (a1) -; RV64MV-NEXT: lui a1, %hi(.LCPI3_3) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_3) -; RV64MV-NEXT: vle64.v v10, (a1) ; RV64MV-NEXT: li a1, -1 ; RV64MV-NEXT: srli a1, a1, 31 ; RV64MV-NEXT: vand.vx v8, v8, a1 -; RV64MV-NEXT: vmsne.vv v0, v8, v10 +; RV64MV-NEXT: vmv.s.x v10, zero +; RV64MV-NEXT: vid.v v12 +; RV64MV-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; RV64MV-NEXT: vslideup.vi v12, v10, 3 +; RV64MV-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64MV-NEXT: vmsne.vv v0, v8, v12 ; RV64MV-NEXT: vmv.v.i v8, 0 ; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64MV-NEXT: vsetivli zero, 1, e64, m2, ta, mu diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -547,35 +547,37 @@ ; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32MV-NEXT: addi a1, sp, 8 ; RV32MV-NEXT: vle16.v v8, (a1) -; RV32MV-NEXT: vmv.v.i v9, 10 -; RV32MV-NEXT: li a1, 9 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; RV32MV-NEXT: li a1, 10 ; RV32MV-NEXT: vmv.s.x v9, a1 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; RV32MV-NEXT: vid.v v10 +; RV32MV-NEXT: vsrl.vi v11, v10, 1 +; RV32MV-NEXT: vadd.vi v12, v11, 9 +; RV32MV-NEXT: vsetivli zero, 2, e16, mf2, tu, mu +; RV32MV-NEXT: vslideup.vi v12, v9, 1 +; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32MV-NEXT: lui a1, %hi(.LCPI4_0) ; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV32MV-NEXT: vle16.v v10, (a1) -; RV32MV-NEXT: vid.v v11 -; RV32MV-NEXT: vsub.vv v8, v8, v11 -; RV32MV-NEXT: vmul.vv v8, v8, v10 -; RV32MV-NEXT: vadd.vv v10, v8, v8 -; RV32MV-NEXT: vsll.vv v9, v10, v9 -; RV32MV-NEXT: vmv.v.i v10, 0 -; RV32MV-NEXT: li a1, 1 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; RV32MV-NEXT: vmv1r.v v11, v10 -; RV32MV-NEXT: vmv.s.x v11, a1 +; RV32MV-NEXT: vle16.v v9, (a1) +; RV32MV-NEXT: vsub.vv v8, v8, v10 +; RV32MV-NEXT: vmul.vv v8, v8, v9 +; RV32MV-NEXT: vadd.vv v9, v8, v8 +; RV32MV-NEXT: vsll.vv v9, v9, v12 +; RV32MV-NEXT: vmv.s.x v10, zero +; RV32MV-NEXT: vrsub.vi v11, v11, 1 +; RV32MV-NEXT: vsetivli zero, 2, e16, mf2, tu, mu +; RV32MV-NEXT: vslideup.vi v11, v10, 1 ; RV32MV-NEXT: li a1, 2047 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: lui a2, %hi(.LCPI4_1) ; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1) -; RV32MV-NEXT: vle16.v v12, (a2) +; RV32MV-NEXT: vle16.v v10, (a2) ; RV32MV-NEXT: vsrl.vv v8, v8, v11 ; RV32MV-NEXT: vor.vv v8, v8, v9 ; RV32MV-NEXT: vand.vx v8, v8, a1 -; RV32MV-NEXT: vmsltu.vv v0, v12, v8 -; RV32MV-NEXT: vmerge.vim v8, v10, -1, v0 +; RV32MV-NEXT: vmsltu.vv v0, v10, v8 +; RV32MV-NEXT: vmv.v.i v8, 0 +; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32MV-NEXT: vsetivli zero, 1, e16, mf2, ta, mu ; RV32MV-NEXT: vslidedown.vi v9, v8, 2 ; RV32MV-NEXT: vmv.x.s a1, v9 @@ -612,35 +614,37 @@ ; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV64MV-NEXT: addi a1, sp, 8 ; RV64MV-NEXT: vle16.v v8, (a1) -; RV64MV-NEXT: vmv.v.i v9, 10 -; RV64MV-NEXT: li a1, 9 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; RV64MV-NEXT: li a1, 10 ; RV64MV-NEXT: vmv.s.x v9, a1 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; RV64MV-NEXT: vid.v v10 +; RV64MV-NEXT: vsrl.vi v11, v10, 1 +; RV64MV-NEXT: vadd.vi v12, v11, 9 +; RV64MV-NEXT: vsetivli zero, 2, e16, mf2, tu, mu +; RV64MV-NEXT: vslideup.vi v12, v9, 1 +; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV64MV-NEXT: lui a1, %hi(.LCPI4_0) ; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV64MV-NEXT: vle16.v v10, (a1) -; RV64MV-NEXT: vid.v v11 -; RV64MV-NEXT: vsub.vv v8, v8, v11 -; RV64MV-NEXT: vmul.vv v8, v8, v10 -; RV64MV-NEXT: vadd.vv v10, v8, v8 -; RV64MV-NEXT: vsll.vv v9, v10, v9 -; RV64MV-NEXT: vmv.v.i v10, 0 -; RV64MV-NEXT: li a1, 1 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; RV64MV-NEXT: vmv1r.v v11, v10 -; RV64MV-NEXT: vmv.s.x v11, a1 +; RV64MV-NEXT: vle16.v v9, (a1) +; RV64MV-NEXT: vsub.vv v8, v8, v10 +; RV64MV-NEXT: vmul.vv v8, v8, v9 +; RV64MV-NEXT: vadd.vv v9, v8, v8 +; RV64MV-NEXT: vsll.vv v9, v9, v12 +; RV64MV-NEXT: vmv.s.x v10, zero +; RV64MV-NEXT: vrsub.vi v11, v11, 1 +; RV64MV-NEXT: vsetivli zero, 2, e16, mf2, tu, mu +; RV64MV-NEXT: vslideup.vi v11, v10, 1 ; RV64MV-NEXT: li a1, 2047 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; RV64MV-NEXT: vand.vx v8, v8, a1 ; RV64MV-NEXT: lui a2, %hi(.LCPI4_1) ; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_1) -; RV64MV-NEXT: vle16.v v12, (a2) +; RV64MV-NEXT: vle16.v v10, (a2) ; RV64MV-NEXT: vsrl.vv v8, v8, v11 ; RV64MV-NEXT: vor.vv v8, v8, v9 ; RV64MV-NEXT: vand.vx v8, v8, a1 -; RV64MV-NEXT: vmsltu.vv v0, v12, v8 -; RV64MV-NEXT: vmerge.vim v8, v10, -1, v0 +; RV64MV-NEXT: vmsltu.vv v0, v10, v8 +; RV64MV-NEXT: vmv.v.i v8, 0 +; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64MV-NEXT: vmv.x.s a1, v8 ; RV64MV-NEXT: andi a1, a1, 2047 ; RV64MV-NEXT: vsetivli zero, 1, e16, mf2, ta, mu