diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1749,7 +1749,8 @@ // Note that this method will also match potentially unappealing index // sequences, like , however it is left to the caller to // determine whether this is worth generating code for. -static Optional isSimpleVIDSequence(SDValue Op) { +static Optional +isSimpleVIDSequence(SDValue Op, bool IgnoreFirstElement = false) { unsigned NumElts = Op.getNumOperands(); assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); if (!Op.getValueType().isInteger()) @@ -1759,7 +1760,7 @@ Optional SeqStepNum, SeqAddend; Optional> PrevElt; unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); - for (unsigned Idx = 0; Idx < NumElts; Idx++) { + for (unsigned Idx = IgnoreFirstElement ? 1 : 0; Idx < NumElts; Idx++) { // Assume undef elements match the sequence; we just have to be careful // when interpolating across them. if (Op.getOperand(Idx).isUndef()) @@ -1956,7 +1957,15 @@ // Try and match index sequences, which we can lower to the vid instruction // with optional modifications. An all-undef vector is matched by // getSplatValue, above. - if (auto SimpleVID = isSimpleVIDSequence(Op)) { + auto SimpleVID = isSimpleVIDSequence(Op); + // We could use vmv.s.x to set the first element singly. So we will try to + // match in the situation that exclude the first element. + bool InconsistentFirst = false; + if (!SimpleVID) { + SimpleVID = isSimpleVIDSequence(Op, true); + InconsistentFirst = true; + } + if (SimpleVID) { int64_t StepNumerator = SimpleVID->StepNumerator; unsigned StepDenominator = SimpleVID->StepDenominator; int64_t Addend = SimpleVID->Addend; @@ -2002,6 +2011,13 @@ DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT)); VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID); } + if (InconsistentFirst) { + auto FirstElement = Op.getOperand(0); + VID = convertToScalableVector(ContainerVT, VID, DAG, Subtarget); + VID = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, VID, + FirstElement, VL); + VID = convertFromScalableVector(VT, VID, DAG, Subtarget); + } return VID; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -133,10 +133,13 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0) ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; RV32-NEXT: vle16.v v14, (a0) +; RV32-NEXT: vid.v v12 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vrsub.vi v14, v12, 4 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; RV32-NEXT: vmv.s.x v14, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v8, v14 ; RV32-NEXT: li a0, 8 @@ -149,10 +152,14 @@ ; ; RV64-LABEL: vrgather_shuffle_vv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI6_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0) ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vle64.v v14, (a0) +; RV64-NEXT: vid.v v12 +; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vrsub.vi v14, v12, 4 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; RV64-NEXT: vmv.s.x v14, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV64-NEXT: vrgather.vv v12, v8, v14 ; RV64-NEXT: li a0, 8 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -663,3 +663,49 @@ store <8 x i16> , <8 x i16>* %x ret void } + +define void @buildvec_vid_vmv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: buildvec_vid_vmv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: li a1, 77 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, mu +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store <8 x i16> , <8 x i16>* %x + ret void +} + +define void @buildvec_vid_mpy_vmv_imm_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: buildvec_vid_mpy_vmv_imm_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: li a1, 17 +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: li a1, 77 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, mu +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store <8 x i16> , <8 x i16>* %x + ret void +} + +define void @buildvec_vid_shl_vmv_imm_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: buildvec_vid_shl_vmv_imm_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsll.vi v8, v8, 9 +; CHECK-NEXT: li a1, 77 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, mu +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store <8 x i16> , <8 x i16>* %x + ret void +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -85,10 +85,14 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: vrgather_shuffle_vv_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vle16.v v11, (a0) +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vv v10, v10, v10 +; CHECK-NEXT: vrsub.vi v11, v10, 4 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; CHECK-NEXT: vmv.s.x v11, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -4189,16 +4189,19 @@ ; LMULMAX1-LABEL: mulhu_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; LMULMAX1-NEXT: vle16.v v8, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: lui a2, %hi(.LCPI130_0) -; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI130_0) -; LMULMAX1-NEXT: vle16.v v9, (a2) -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: vdivu.vv v8, v8, v9 -; LMULMAX1-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vle16.v v9, (a1) +; LMULMAX1-NEXT: vid.v v10 +; LMULMAX1-NEXT: vadd.vi v10, v10, 8 +; LMULMAX1-NEXT: li a2, 7 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, m1, tu, mu +; LMULMAX1-NEXT: vmv.s.x v10, a2 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; LMULMAX1-NEXT: vdivu.vv v9, v9, v10 +; LMULMAX1-NEXT: vdivu.vv v8, v8, v10 +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: vse16.v v9, (a1) ; LMULMAX1-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = udiv <16 x i16> %a, @@ -4335,13 +4338,17 @@ ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI132_1) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI132_1) -; LMULMAX2-RV64-NEXT: vle64.v v14, (a1) ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v14 +; LMULMAX2-RV64-NEXT: vid.v v10 +; LMULMAX2-RV64-NEXT: vsrl.vi v10, v10, 1 +; LMULMAX2-RV64-NEXT: vadd.vi v10, v10, 2 +; LMULMAX2-RV64-NEXT: li a1, 1 +; LMULMAX2-RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 +; LMULMAX2-RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ;