diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2184,7 +2184,8 @@ // a single addi instruction. if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && - isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) { + isPowerOf2_32(StepDenominator) && + (StepNumerator > 0 || StepDenominator == 1) && isInt<5>(Addend)) { SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); // Convert right out of the scalable type so we can use standard ISD // nodes for the rest of the computation. If we used scalable types with diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -634,13 +634,17 @@ ret void } +; We match this as a (-1 / 4) - 5 sequence but don't want to introduce a vdiv +; to divide the negative step. define void @buildvec_vid_stepn1o4_addn5_v8i8(<8 x i8>* %z0) { ; CHECK-LABEL: buildvec_vid_stepn1o4_addn5_v8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 15 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vrsub.vi v8, v8, -5 +; CHECK-NEXT: vmv.v.i v8, -6 +; CHECK-NEXT: vmerge.vim v8, v8, -5, v0 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <8 x i8> , <8 x i8>* %z0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1205,23 +1205,25 @@ ; RV32-NEXT: vmv.s.x v9, a1 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vmulh.vv v9, v8, v9 +; RV32-NEXT: li a1, 3 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vsrl.vi v10, v10, 1 -; RV32-NEXT: vrsub.vi v10, v10, 0 +; RV32-NEXT: vmv.v.i v10, -1 +; RV32-NEXT: vmerge.vim v10, v10, 0, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vmadd.vv v10, v8, v9 +; RV32-NEXT: li a1, 63 +; RV32-NEXT: vsrl.vx v8, v10, a1 ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vmv.s.x v9, a1 +; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu -; RV32-NEXT: vslideup.vi v9, v8, 2 +; RV32-NEXT: vslideup.vi v11, v9, 2 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vsra.vv v8, v10, v9 -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsrl.vx v9, v10, a1 -; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: vsra.vv v9, v10, v11 +; RV32-NEXT: vadd.vv v8, v9, v8 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ;