diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11197,12 +11197,22 @@ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); } +bool isVMV_V_X_VLOfConstant(SDValue N, APInt &SplatVal) { + if (N.getOpcode() == RISCVISD::VMV_V_X_VL && + isa(N->getOperand(1))) { + SplatVal = N->getConstantOperandAPInt(1); + return true; + } + return false; +} + // According to the property that indexed load/store instructions // zero-extended their indices, \p narrowIndex tries to narrow the type of index // operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C < // bits(ty). static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) { - if (N.getOpcode() != ISD::SHL || !N->hasOneUse()) + if ((N.getOpcode() != ISD::SHL && N.getOpcode() != RISCVISD::SHL_VL) || + !N->hasOneUse()) return SDValue(); SDValue N0 = N.getOperand(0); @@ -11214,7 +11224,21 @@ APInt ShAmt; SDValue N1 = N.getOperand(1); - if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) + // If this is an insert of an extracted vector into an undef vector, we can + // just use the input to the extract. + if (N1.getOpcode() == ISD::INSERT_SUBVECTOR) { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + EVT VT = N->getValueType(0); + if (N10.isUndef() && N11.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N11.getOperand(1) == N1.getOperand(2) && + N11.getOperand(0).getValueType() == VT) { + N1 = N11.getOperand(0); + } + } + + if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt) && + !isVMV_V_X_VLOfConstant(N1, ShAmt)) return SDValue(); SDLoc DL(N); @@ -11233,6 +11257,14 @@ EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); + if (N.getOpcode() == RISCVISD::SHL_VL) { + SDValue NewShAmtVec = + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NewVT, DAG.getUNDEF(NewVT), + N1.getOperand(1), N1.getOperand(2)); + return DAG.getNode(RISCVISD::SHL_VL, DL, NewVT, NewExt, NewShAmtVec, + DAG.getUNDEF(NewVT), N.getOperand(3), N.getOperand(4)); + } + SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -1710,21 +1710,19 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t +; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t ; RV32-NEXT: vmv.v.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf8 v12, v8 -; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwaddu.vv v10, v8, v8 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t +; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t ; RV64V-NEXT: vmv.v.v v8, v9 ; RV64V-NEXT: ret ; @@ -2780,20 +2778,21 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v8, v9, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf8 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v9, v8 +; RV64V-NEXT: vsll.vi v8, v9, 2 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t +; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -3245,11 +3244,10 @@ ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf4 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV64V-NEXT: vzext.vf2 v12, v8 +; RV64V-NEXT: vsll.vi v8, v12, 2 +; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -4760,20 +4758,21 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf4 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vzext.vf2 v9, v8 +; RV32V-NEXT: vsll.vi v8, v9, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf8 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v9, v8 +; RV64V-NEXT: vsll.vi v8, v9, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -5616,10 +5615,11 @@ ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf4 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vzext.vf2 v10, v8 +; RV64V-NEXT: vsll.vi v8, v10, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -7645,21 +7645,19 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t +; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t ; RV32-NEXT: vmv.v.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf8 v12, v8 -; RV64V-NEXT: vadd.vv v12, v12, v12 +; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64V-NEXT: vwaddu.vv v10, v8, v8 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t +; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t ; RV64V-NEXT: vmv.v.v v8, v9 ; RV64V-NEXT: ret ; @@ -8589,20 +8587,21 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 -; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v8, v9, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf8 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v9, v8 +; RV64V-NEXT: vsll.vi v8, v9, 2 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t +; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -9054,11 +9053,10 @@ ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vzext.vf4 v12, v8 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; RV64V-NEXT: vzext.vf2 v12, v8 +; RV64V-NEXT: vsll.vi v8, v12, 2 +; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; @@ -10324,20 +10322,21 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf4 v10, v8 -; RV32V-NEXT: vsll.vi v8, v10, 3 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vzext.vf2 v9, v8 +; RV32V-NEXT: vsll.vi v8, v9, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t +; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t ; RV32V-NEXT: vmv.v.v v8, v12 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf8 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64V-NEXT: vzext.vf2 v9, v8 +; RV64V-NEXT: vsll.vi v8, v9, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; @@ -10997,10 +10996,11 @@ ; ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64V-NEXT: vzext.vf4 v16, v8 -; RV64V-NEXT: vsll.vi v8, v16, 3 -; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t +; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-NEXT: vzext.vf2 v10, v8 +; RV64V-NEXT: vsll.vi v8, v10, 3 +; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t ; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1303,20 +1303,18 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v9 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v10, v9, v9 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16: @@ -2202,19 +2200,20 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v11, v10 +; RV32-NEXT: vsll.vi v10, v11, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v11, v10 +; RV64-NEXT: vsll.vi v10, v11, 2 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32: @@ -2612,11 +2611,10 @@ ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v10, v12, 2 +; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32: @@ -3943,19 +3941,20 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf4 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vzext.vf2 v13, v12 +; RV32V-NEXT: vsll.vi v12, v13, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v13, v12 +; RV64-NEXT: vsll.vi v12, v13, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: @@ -4702,10 +4701,11 @@ ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: @@ -6554,20 +6554,18 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v9 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v10, v9, v9 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f16: @@ -7399,19 +7397,20 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v11, v10 +; RV32-NEXT: vsll.vi v10, v11, 2 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v11, v10 +; RV64-NEXT: vsll.vi v10, v11, 2 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32: @@ -7809,11 +7808,10 @@ ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v10, v12, 2 +; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32: @@ -8957,19 +8955,20 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV32V: # %bb.0: -; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32V-NEXT: vzext.vf4 v14, v12 -; RV32V-NEXT: vsll.vi v12, v14, 3 +; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32V-NEXT: vzext.vf2 v13, v12 +; RV32V-NEXT: vsll.vi v12, v13, 3 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32V-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v13, v12 +; RV64-NEXT: vsll.vi v12, v13, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64: @@ -9584,10 +9583,11 @@ ; ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 3 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret ; ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -531,20 +531,18 @@ define <8 x i16> @vpgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v9, v8, v8 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v9, v8, v8 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs @@ -742,20 +740,20 @@ define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v10, v9, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v9, v8 +; RV64-NEXT: vsll.vi v10, v9, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -822,11 +820,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v8, v10, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -982,20 +980,20 @@ define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v12, v9, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v9, v8 +; RV64-NEXT: vsll.vi v12, v9, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1062,11 +1060,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v12, v10, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1294,20 +1292,18 @@ define <8 x half> @vpgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v9, v8, v8 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v9, v8, v8 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs @@ -1463,20 +1459,20 @@ define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vsll.vi v8, v10, 2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v10, v9, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v9, v8 +; RV64-NEXT: vsll.vi v10, v9, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1543,11 +1539,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v8 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v8, v10, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1703,20 +1699,20 @@ define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v8 -; RV32-NEXT: vsll.vi v12, v10, 3 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v9, v8 +; RV32-NEXT: vsll.vi v12, v9, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v9, v8 +; RV64-NEXT: vsll.vi v12, v9, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -1783,11 +1779,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v8 -; RV64-NEXT: vsll.vi v8, v12, 3 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v12, v10, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -2081,22 +2077,21 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vslidedown.vi v10, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v24, v12, 3 +; RV64-NEXT: vzext.vf2 v10, v8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsll.vi v16, v10, 3 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB89_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB89_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 @@ -2104,7 +2099,7 @@ ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t +; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i8> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2259,22 +2254,21 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vslidedown.vi v12, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v16, v12 +; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vzext.vf2 v12, v8 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsll.vi v16, v12, 3 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB92_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB92_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 @@ -2282,7 +2276,7 @@ ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t +; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i16> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -376,20 +376,18 @@ define void @vpscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v9 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v10, v9, v9 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs @@ -562,20 +560,20 @@ define void @vpscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v11, v10 +; RV32-NEXT: vsll.vi v10, v11, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v11, v10 +; RV64-NEXT: vsll.vi v10, v11, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -642,11 +640,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v10, v12, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -798,20 +796,20 @@ define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v13, v12 +; RV32-NEXT: vsll.vi v12, v13, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v13, v12 +; RV64-NEXT: vsll.vi v12, v13, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -878,11 +876,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs @@ -1102,20 +1100,18 @@ define void @vpscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v10, v9 -; RV32-NEXT: vadd.vv v10, v10, v10 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vwaddu.vv v10, v9, v9 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v9 -; RV64-NEXT: vadd.vv v12, v12, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vwaddu.vv v10, v9, v9 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs @@ -1267,20 +1263,20 @@ define void @vpscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vsll.vi v10, v12, 2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v11, v10 +; RV32-NEXT: vsll.vi v10, v11, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v11, v10 +; RV64-NEXT: vsll.vi v10, v11, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1347,11 +1343,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 -; RV64-NEXT: vsll.vi v12, v12, 2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v12, v10 +; RV64-NEXT: vsll.vi v10, v12, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -1503,20 +1499,20 @@ define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vzext.vf4 v14, v12 -; RV32-NEXT: vsll.vi v12, v14, 3 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vzext.vf2 v13, v12 +; RV32-NEXT: vsll.vi v12, v13, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vzext.vf2 v13, v12 +; RV64-NEXT: vsll.vi v12, v13, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs @@ -1583,11 +1579,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v12, v16, 3 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs