diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2286,6 +2286,35 @@ return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); } +// Is the mask a slidedown that shifts in undefs. +static int matchShuffleAsSlideDown(ArrayRef Mask) { + int Size = Mask.size(); + + // Elements shifted in should be undef. + auto CheckUndefs = [&](int Shift) { + for (int i = Size - Shift; i != Size; ++i) + if (Mask[i] >= 0) + return false; + return true; + }; + + // Elements should be shifted or undef. + auto MatchShift = [&](int Shift) { + for (int i = 0; i != Size - Shift; ++i) + if (Mask[i] >= 0 && Mask[i] != Shift + i) + return false; + return true; + }; + + // Try all possible shifts. + for (int Shift = 1; Shift != Size; ++Shift) + if (CheckUndefs(Shift) && MatchShift(Shift)) + return Shift; + + // No match. + return -1; +} + static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { SDValue V1 = Op.getOperand(0); @@ -2371,6 +2400,20 @@ } } + // Try to match as a slidedown. + int SlideAmt = matchShuffleAsSlideDown(SVN->getMask()); + if (SlideAmt >= 0) { + // TODO: Should we reduce the VL to account for the upper undef elements? + // Requires additional vsetvlis, but might be faster to execute. + V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); + SDValue SlideDown = + DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), V1, + DAG.getConstant(SlideAmt, DL, XLenVT), + TrueMask, VL); + return convertFromScalableVector(VT, SlideDown, DAG, Subtarget); + } + // Detect shuffles which can be re-expressed as vector selects; these are // shuffles in which each element in the destination is taken from an element // at the corresponding index in either source vectors. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -235,3 +235,23 @@ %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> ret <4 x double> %s } + +define <4 x half> @slidedown_v4f16(<4 x half> %x) { +; CHECK-LABEL: slidedown_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: ret + %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> + ret <4 x half> %s +} + +define <8 x float> @slidedown_v8f32(<8 x float> %x) { +; CHECK-LABEL: slidedown_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: ret + %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> + ret <8 x float> %s +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -541,3 +541,23 @@ %shuf = shufflevector <4 x i8> %v, <4 x i8> undef, <8 x i32> ret <8 x i8> %shuf } + +define <4 x i16> @slidedown_v4i16(<4 x i16> %x) { +; CHECK-LABEL: slidedown_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: ret + %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> + ret <4 x i16> %s +} + +define <8 x i32> @slidedown_v8i32(<8 x i32> %x) { +; CHECK-LABEL: slidedown_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: ret + %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> + ret <8 x i32> %s +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -5584,10 +5584,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v9, v9, 2 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5604,13 +5602,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v10, v9, 4 -; CHECK-NEXT: vrgather.vv v11, v8, v10 -; CHECK-NEXT: vmul.vv v8, v8, v11 -; CHECK-NEXT: vadd.vi v9, v9, 2 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5627,16 +5622,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v10, v9, 8 -; CHECK-NEXT: vrgather.vv v11, v8, v10 -; CHECK-NEXT: vmul.vv v8, v8, v11 -; CHECK-NEXT: vadd.vi v10, v9, 4 -; CHECK-NEXT: vrgather.vv v11, v8, v10 -; CHECK-NEXT: vmul.vv v8, v8, v11 -; CHECK-NEXT: vadd.vi v9, v9, 2 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5654,21 +5645,14 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI253_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI253_0) -; CHECK-NEXT: vle8.v v10, (a0) -; CHECK-NEXT: vrgather.vv v12, v8, v10 -; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v12, v10, 8 -; CHECK-NEXT: vrgather.vv v14, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v14 -; CHECK-NEXT: vadd.vi v12, v10, 4 -; CHECK-NEXT: vrgather.vv v14, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v14 -; CHECK-NEXT: vadd.vi v10, v10, 2 -; CHECK-NEXT: vrgather.vv v12, v8, v10 -; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v10, v8, 16 +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v10, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v10, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5686,26 +5670,17 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI254_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI254_0) -; CHECK-NEXT: vle8.v v12, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI254_1) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI254_1) -; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vrgather.vv v20, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v20 -; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vslidedown.vx v12, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 16 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vmul.vv v8, v8, v12 -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vadd.vi v16, v12, 8 -; CHECK-NEXT: vrgather.vv v20, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v20 -; CHECK-NEXT: vadd.vi v16, v12, 4 -; CHECK-NEXT: vrgather.vv v20, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v20 -; CHECK-NEXT: vadd.vi v12, v12, 2 -; CHECK-NEXT: vrgather.vv v16, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vrgather.vi v12, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5723,31 +5698,20 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI255_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI255_0) -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: vrgather.vv v16, v8, v24 -; CHECK-NEXT: lui a0, %hi(.LCPI255_1) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI255_1) -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI255_2) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI255_2) -; CHECK-NEXT: vle8.v v0, (a0) +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vslidedown.vi v16, v8, 8 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v0 +; CHECK-NEXT: vslidedown.vi v16, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 2 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vadd.vi v24, v16, 8 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v24, v16, 4 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v16, v16, 2 -; CHECK-NEXT: vrgather.vv v24, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v24 ; CHECK-NEXT: vrgather.vi v16, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5767,32 +5731,21 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI256_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI256_0) -; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v24 -; CHECK-NEXT: lui a0, %hi(.LCPI256_1) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI256_1) -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI256_2) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI256_2) -; CHECK-NEXT: vle8.v v0, (a0) +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vslidedown.vi v16, v8, 8 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v0 +; CHECK-NEXT: vslidedown.vi v16, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 2 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vadd.vi v24, v16, 8 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v24, v16, 4 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v16, v16, 2 -; CHECK-NEXT: vrgather.vv v24, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v24 ; CHECK-NEXT: vrgather.vi v16, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5839,10 +5792,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v9, v9, 2 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5859,13 +5810,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v10, v9, 4 -; CHECK-NEXT: vrgather.vv v11, v8, v10 -; CHECK-NEXT: vmul.vv v8, v8, v11 -; CHECK-NEXT: vadd.vi v9, v9, 2 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5882,16 +5830,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v12, v10, 8 -; CHECK-NEXT: vrgather.vv v14, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v14 -; CHECK-NEXT: vadd.vi v12, v10, 4 -; CHECK-NEXT: vrgather.vv v14, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v14 -; CHECK-NEXT: vadd.vi v10, v10, 2 -; CHECK-NEXT: vrgather.vv v12, v8, v10 -; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v10, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v10, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5909,21 +5853,14 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI262_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI262_0) -; CHECK-NEXT: vle16.v v12, (a0) -; CHECK-NEXT: vrgather.vv v16, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vadd.vi v16, v12, 8 -; CHECK-NEXT: vrgather.vv v20, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v20 -; CHECK-NEXT: vadd.vi v16, v12, 4 -; CHECK-NEXT: vrgather.vv v20, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v20 -; CHECK-NEXT: vadd.vi v12, v12, 2 -; CHECK-NEXT: vrgather.vv v16, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v12, v8, 16 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v12 ; CHECK-NEXT: vrgather.vi v12, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5941,26 +5878,17 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI263_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI263_0) -; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI263_1) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI263_1) -; CHECK-NEXT: vle16.v v24, (a0) -; CHECK-NEXT: vrgather.vv v0, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 16 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 2 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vadd.vi v24, v16, 8 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v24, v16, 4 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v16, v16, 2 -; CHECK-NEXT: vrgather.vv v24, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v24 ; CHECK-NEXT: vrgather.vi v16, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -5980,27 +5908,18 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI264_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI264_0) -; CHECK-NEXT: vle16.v v24, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI264_1) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI264_1) -; CHECK-NEXT: vle16.v v0, (a0) ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v0 +; CHECK-NEXT: vslidedown.vi v16, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 2 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vadd.vi v24, v16, 8 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v24, v16, 4 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v16, v16, 2 -; CHECK-NEXT: vrgather.vv v24, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v24 ; CHECK-NEXT: vrgather.vi v16, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -6047,10 +5966,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v9, v9, 2 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -6067,13 +5984,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v12, v10, 4 -; CHECK-NEXT: vrgather.vv v14, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v14 -; CHECK-NEXT: vadd.vi v10, v10, 2 -; CHECK-NEXT: vrgather.vv v12, v8, v10 -; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vslidedown.vi v10, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -6090,16 +6004,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vadd.vi v16, v12, 8 -; CHECK-NEXT: vrgather.vv v20, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v20 -; CHECK-NEXT: vadd.vi v16, v12, 4 -; CHECK-NEXT: vrgather.vv v20, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v20 -; CHECK-NEXT: vadd.vi v12, v12, 2 -; CHECK-NEXT: vrgather.vv v16, v8, v12 -; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vslidedown.vi v12, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v12 ; CHECK-NEXT: vrgather.vi v12, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -6117,21 +6027,14 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI270_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI270_0) -; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vrgather.vv v24, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v24 -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vadd.vi v24, v16, 8 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v24, v16, 4 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v16, v16, 2 -; CHECK-NEXT: vrgather.vv v24, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v24 +; CHECK-NEXT: vslidedown.vi v16, v8, 16 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 2 +; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vrgather.vi v16, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -6151,22 +6054,15 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI271_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI271_0) -; CHECK-NEXT: vle32.v v24, (a0) ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vslidedown.vi v16, v8, 16 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 8 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 4 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vslidedown.vi v16, v8, 2 ; CHECK-NEXT: vmul.vv v8, v8, v16 -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vadd.vi v24, v16, 8 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v24, v16, 4 -; CHECK-NEXT: vrgather.vv v0, v8, v24 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vadd.vi v16, v16, 2 -; CHECK-NEXT: vrgather.vv v24, v8, v16 -; CHECK-NEXT: vmul.vv v8, v8, v24 ; CHECK-NEXT: vrgather.vi v16, v8, 1 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -6237,12 +6133,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vadd.vi v10, v10, 2 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32-NEXT: vrgatherei16.vv v12, v8, v10 -; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: vrgather.vi v10, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 @@ -6256,10 +6148,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vadd.vi v10, v10, 2 -; RV64-NEXT: vrgather.vv v12, v8, v10 -; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vmul.vv v8, v8, v10 ; RV64-NEXT: vrgather.vi v10, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 @@ -6276,17 +6166,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vadd.vi v13, v12, 4 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v8, v13 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vadd.vi v12, v12, 2 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v8, v12 -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vslidedown.vi v12, v8, 4 +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vslidedown.vi v12, v8, 2 +; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: vrgather.vi v12, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 @@ -6300,13 +6183,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vadd.vi v16, v12, 4 -; RV64-NEXT: vrgather.vv v20, v8, v16 -; RV64-NEXT: vmul.vv v8, v8, v20 -; RV64-NEXT: vadd.vi v12, v12, 2 -; RV64-NEXT: vrgather.vv v16, v8, v12 -; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vslidedown.vi v12, v8, 4 +; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: vslidedown.vi v12, v8, 2 +; RV64-NEXT: vmul.vv v8, v8, v12 ; RV64-NEXT: vrgather.vi v12, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 @@ -6323,22 +6203,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vadd.vi v18, v16, 8 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v18 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vadd.vi v18, v16, 4 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v18 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vadd.vi v16, v16, 2 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vrgather.vi v16, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 @@ -6352,16 +6222,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vadd.vi v24, v16, 8 -; RV64-NEXT: vrgather.vv v0, v8, v24 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: vadd.vi v24, v16, 4 -; RV64-NEXT: vrgather.vv v0, v8, v24 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: vadd.vi v16, v16, 2 -; RV64-NEXT: vrgather.vv v24, v8, v16 -; RV64-NEXT: vmul.vv v8, v8, v24 +; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vslidedown.vi v16, v8, 4 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vslidedown.vi v16, v8, 2 +; RV64-NEXT: vmul.vv v8, v8, v16 ; RV64-NEXT: vrgather.vi v16, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 @@ -6381,22 +6247,12 @@ ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vadd.vi v18, v16, 8 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v18 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vadd.vi v18, v16, 4 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v18 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vadd.vi v16, v16, 2 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vrgather.vi v16, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, mu @@ -6413,16 +6269,12 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vadd.vi v24, v16, 8 -; RV64-NEXT: vrgather.vv v0, v8, v24 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: vadd.vi v24, v16, 4 -; RV64-NEXT: vrgather.vv v0, v8, v24 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: vadd.vi v16, v16, 2 -; RV64-NEXT: vrgather.vv v24, v8, v16 -; RV64-NEXT: vmul.vv v8, v8, v24 +; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vslidedown.vi v16, v8, 4 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vslidedown.vi v16, v8, 2 +; RV64-NEXT: vmul.vv v8, v8, v16 ; RV64-NEXT: vrgather.vi v16, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 @@ -6448,22 +6300,12 @@ ; RV32-NEXT: vmul.vv v16, v24, v16 ; RV32-NEXT: vmul.vv v8, v8, v0 ; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vid.v v16 -; RV32-NEXT: vadd.vi v18, v16, 8 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v18 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vadd.vi v18, v16, 4 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v18 -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vadd.vi v16, v16, 2 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vrgather.vi v16, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, mu @@ -6486,16 +6328,12 @@ ; RV64-NEXT: vmul.vv v16, v24, v16 ; RV64-NEXT: vmul.vv v8, v8, v0 ; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vadd.vi v24, v16, 8 -; RV64-NEXT: vrgather.vv v0, v8, v24 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: vadd.vi v24, v16, 4 -; RV64-NEXT: vrgather.vv v0, v8, v24 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: vadd.vi v16, v16, 2 -; RV64-NEXT: vrgather.vv v24, v8, v16 -; RV64-NEXT: vmul.vv v8, v8, v24 +; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vslidedown.vi v16, v8, 4 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vslidedown.vi v16, v8, 2 +; RV64-NEXT: vmul.vv v8, v8, v16 ; RV64-NEXT: vrgather.vi v16, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8