diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1150,7 +1150,8 @@ // Jumps are expensive, compared to logic setJumpIsExpensive(); - setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, + setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, + ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -10623,6 +10624,46 @@ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); } +// According to the property that indexed load/store instructions +// zero-extended their indices, \p narrowIndex tries to narrow the type of index +// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C < +// bits(ty). +static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) { + if (N.getOpcode() != ISD::SHL || !N->hasOneUse()) + return SDValue(); + + SDValue N0 = N.getOperand(0); + if (N0.getOpcode() != ISD::ZERO_EXTEND && + N0.getOpcode() != RISCVISD::VZEXT_VL) + return SDValue(); + if (!N0->hasOneUse()) + return SDValue(); + + APInt ShAmt; + SDValue N1 = N.getOperand(1); + if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) + return SDValue(); + + SDLoc DL(N); + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + unsigned SrcElen = SrcVT.getScalarSizeInBits(); + unsigned ShAmtV = ShAmt.getZExtValue(); + unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV); + NewElen = std::max(NewElen, 8U); + + // Skip if NewElen is not narrower than the original extended type. + if (NewElen >= N0.getValueType().getScalarSizeInBits()) + return SDValue(); + + EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen); + EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); + + SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); + SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); + return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); +} + // Replace (seteq (i64 (and X, 0xffffffff)), C1) with // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg @@ -12920,8 +12961,11 @@ } break; } + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = N->getConstantOperandVal(0); + unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IntNo = N->getConstantOperandVal(IntOpNo); switch (IntNo) { // By default we do not combine any intrinsic. default: @@ -12944,6 +12988,23 @@ return DAG.getConstant(-1, DL, VT); return DAG.getConstant(0, DL, VT); } + case Intrinsic::riscv_vloxei: + case Intrinsic::riscv_vloxei_mask: + case Intrinsic::riscv_vluxei: + case Intrinsic::riscv_vluxei_mask: + case Intrinsic::riscv_vsoxei: + case Intrinsic::riscv_vsoxei_mask: + case Intrinsic::riscv_vsuxei: + case Intrinsic::riscv_vsuxei_mask: + if (SDValue V = narrowIndex(N->getOperand(4), DAG)) { + SmallVector Ops(N->ops()); + Ops[4] = V; + const auto *MemSD = cast(N); + return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), + Ops, MemSD->getMemoryVT(), + MemSD->getMemOperand()); + } + return SDValue(); } } case ISD::BITCAST: { diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -521,25 +521,14 @@ } define @mgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vluxei16.v v10, (a0), v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i16, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8i16.nxv8p0( %ptrs, i32 2, %m, %passthru) @@ -776,24 +765,15 @@ } define @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vzext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 2 -; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t -; RV64-NEXT: vmv.v.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8i32.nxv8p0( %ptrs, i32 4, %m, %passthru) @@ -850,24 +830,14 @@ } define @mgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vzext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 2 -; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t -; RV64-NEXT: vmv.v.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vsll.vi v8, v16, 2 +; CHECK-NEXT: vluxei32.v v12, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8i32.nxv8p0( %ptrs, i32 4, %m, %passthru) @@ -1055,24 +1025,15 @@ } define @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vzext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vluxei16.v v16, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8i64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -1129,24 +1090,15 @@ } define @mgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vzext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vsll.vi v8, v12, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vluxei32.v v16, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8i64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -1480,25 +1432,14 @@ } define @mgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vluxei16.v v10, (a0), v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds half, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8f16.nxv8p0( %ptrs, i32 2, %m, %passthru) @@ -1691,24 +1632,15 @@ } define @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vzext.vf4 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 2 -; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t -; RV64-NEXT: vmv.v.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8f32.nxv8p0( %ptrs, i32 4, %m, %passthru) @@ -1765,24 +1697,14 @@ } define @mgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; RV32-NEXT: vzext.vf2 v16, v8 -; RV32-NEXT: vsll.vi v8, v16, 2 -; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t -; RV64-NEXT: vmv.v.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vsll.vi v8, v16, 2 +; CHECK-NEXT: vluxei32.v v12, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8f32.nxv8p0( %ptrs, i32 4, %m, %passthru) @@ -1970,24 +1892,15 @@ } define @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vzext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vluxei16.v v16, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8f64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -2044,24 +1957,15 @@ } define @mgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, %idxs, %m, %passthru) { -; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vzext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v8, v24, 3 -; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vsll.vi v8, v12, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vluxei32.v v16, (a0), v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs %v = call @llvm.masked.gather.nxv8f64.nxv8p0( %ptrs, i32 8, %m, %passthru) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -380,23 +380,13 @@ } define void @mscatter_baseidx_zext_nxv8i8_nxv8i16( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v10 -; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v10, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i16, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8i16.nxv8p0( %val, %ptrs, i32 2, %m) @@ -587,22 +577,14 @@ } define void @mscatter_baseidx_zext_nxv8i8_nxv8i32( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v14, v12 +; CHECK-NEXT: vsll.vi v12, v14, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8i32.nxv8p0( %val, %ptrs, i32 4, %m) @@ -655,22 +637,13 @@ } define void @mscatter_baseidx_zext_nxv8i16_nxv8i32( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v16, v12 +; CHECK-NEXT: vsll.vi v12, v16, 2 +; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8i32.nxv8p0( %val, %ptrs, i32 4, %m) @@ -842,22 +815,14 @@ } define void @mscatter_baseidx_zext_nxv8i8_nxv8i64( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v18, v16 +; CHECK-NEXT: vsll.vi v16, v18, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8i64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -910,22 +875,14 @@ } define void @mscatter_baseidx_zext_nxv8i16_nxv8i64( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v20, v16 +; CHECK-NEXT: vsll.vi v16, v20, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8i64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -1164,23 +1121,13 @@ } define void @mscatter_baseidx_zext_nxv8i8_nxv8f16( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v10 -; RV64-NEXT: vadd.vv v16, v16, v16 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v10, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds half, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8f16.nxv8p0( %val, %ptrs, i32 2, %m) @@ -1352,22 +1299,14 @@ } define void @mscatter_baseidx_zext_nxv8i8_nxv8f32( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v14, v12 +; CHECK-NEXT: vsll.vi v12, v14, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8f32.nxv8p0( %val, %ptrs, i32 4, %m) @@ -1420,22 +1359,13 @@ } define void @mscatter_baseidx_zext_nxv8i16_nxv8f32( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 2 -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v16, v12 +; CHECK-NEXT: vsll.vi v12, v16, 2 +; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8f32.nxv8p0( %val, %ptrs, i32 4, %m) @@ -1607,22 +1537,14 @@ } define void @mscatter_baseidx_zext_nxv8i8_nxv8f64( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v18, v16 +; CHECK-NEXT: vsll.vi v16, v18, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8f64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -1675,22 +1597,14 @@ } define void @mscatter_baseidx_zext_nxv8i16_nxv8f64( %val, ptr %base, %idxs, %m) { -; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf2 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v20, v16 +; CHECK-NEXT: vsll.vi v16, v20, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; CHECK-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs call void @llvm.masked.scatter.nxv8f64.nxv8p0( %val, %ptrs, i32 8, %m) diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll @@ -0,0 +1,382 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.riscv.vloxei.nxv4i32.nxv4i64( + , + *, + , + i64); + +define @test_vloxei(* %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vloxei: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vloxei16.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + i64 %vl) + ret %res +} + +define @test_vloxei2(* %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vloxei2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 14 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vloxei32.v v8, (a0), v8 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 14, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + i64 %vl) + ret %res +} + +define @test_vloxei3(* %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vloxei3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vzext.vf8 v12, v8 +; CHECK-NEXT: vsll.vi v12, v12, 26 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vloxei64.v v8, (a0), v12 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 26, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + i64 %vl) + ret %res +} + +; Test use vp.zext to extend. +declare @llvm.vp.zext.nxvi64.nxv1i8(, , i32) +define @test_vloxei4(* %ptr, %offset, %m, i32 zeroext %vl) { +; CHECK-LABEL: test_vloxei4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8, v0.t +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vloxei16.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + %offset.ext = call @llvm.vp.zext.nxvi64.nxv1i8( %offset, %m, i32 %vl) + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %vl.i64 = zext i32 %vl to i64 + %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + i64 %vl.i64) + ret %res +} + +; Test orignal extnened type is enough narrow. +declare @llvm.riscv.vloxei.nxv4i32.nxv4i16( + , + *, + , + i64); +define @test_vloxei5(* %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vloxei5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vsll.vi v10, v9, 12 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vloxei16.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i16 12, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i16( + undef, + * %ptr, + %shl, + i64 %vl) + ret %res +} + +define @test_vloxei6(* %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vloxei6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vloxei16.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + i64 %vl) + ret %res +} + +define @test_vloxei7(* %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vloxei7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsll.vi v10, v8, 2 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vloxei8.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 2, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + i64 %vl) + ret %res +} + +declare @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( + , + *, + , + , + i64, + i64); + +define @test_vloxei_mask(* %ptr, %offset, %m, i64 %vl) { +; CHECK-LABEL: test_vloxei_mask: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vloxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + %m, + i64 %vl, i64 1) + ret %res +} + +declare @llvm.riscv.vluxei.nxv4i32.nxv4i64( + , + *, + , + i64); + +define @test_vluxei(* %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vluxei: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vluxei16.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vluxei.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + i64 %vl) + ret %res +} + +declare @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( + , + *, + , + , + i64, + i64); + +define @test_vluxei_mask(* %ptr, %offset, %m, i64 %vl) { +; CHECK-LABEL: test_vluxei_mask: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vsll.vi v10, v9, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vluxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + %res = call @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( + undef, + * %ptr, + %shl, + %m, + i64 %vl, i64 1) + ret %res +} + +declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( + , + *, + , + i64); + +define void @test_vsoxei( %val, * %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vsoxei: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v11, v10 +; CHECK-NEXT: vsll.vi v10, v11, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + call void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( + %val, + * %ptr, + %shl, + i64 %vl) + ret void +} + +declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( + , + *, + , + , + i64); + +define void @test_vsoxei_mask( %val, * %ptr, %offset, %m, i64 %vl) { +; CHECK-LABEL: test_vsoxei_mask: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v11, v10 +; CHECK-NEXT: vsll.vi v10, v11, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + call void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( + %val, + * %ptr, + %shl, + %m, + i64 %vl) + ret void +} + +declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( + , + *, + , + i64); + +define void @test_vsuxei( %val, * %ptr, %offset, i64 %vl) { +; CHECK-LABEL: test_vsuxei: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v11, v10 +; CHECK-NEXT: vsll.vi v10, v11, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsuxei16.v v8, (a0), v10 +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + call void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( + %val, + * %ptr, + %shl, + i64 %vl) + ret void +} + +declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( + , + *, + , + , + i64); + +define void @test_vsuxei_mask( %val, * %ptr, %offset, %m, i64 %vl) { +; CHECK-LABEL: test_vsuxei_mask: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v11, v10 +; CHECK-NEXT: vsll.vi v10, v11, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsuxei16.v v8, (a0), v10, v0.t +; CHECK-NEXT: ret +entry: + %offset.ext = zext %offset to + %shamt = insertelement undef, i64 4, i32 0 + %shamt.vec = shufflevector %shamt, poison, zeroinitializer + %shl = shl %offset.ext, %shamt.vec + call void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( + %val, + * %ptr, + %shl, + %m, + i64 %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -577,20 +577,18 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v10, v8, v8 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i16, ptr %base, %eidxs @@ -807,20 +805,20 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v12, v10, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -887,11 +885,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v12, v8 +; RV64-NEXT: vsll.vi v8, v12, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -1066,20 +1064,20 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v16, v10, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v16, v10, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1146,11 +1144,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v12, v8 +; RV64-NEXT: vsll.vi v16, v12, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1398,20 +1396,18 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v10, v8, v8 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds half, ptr %base, %eidxs @@ -1586,20 +1582,20 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v8, v12, 2 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v12, v10, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v12, v10, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1666,11 +1662,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v12, v8 +; RV64-NEXT: vsll.vi v8, v12, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1845,20 +1841,20 @@ define @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v16, v10, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v16, v10, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1925,11 +1921,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v12, v8 +; RV64-NEXT: vsll.vi v16, v12, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2096,20 +2092,20 @@ define @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v8 -; RV32-NEXT: vsll.vi v16, v12, 3 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v10, v8 +; RV32-NEXT: vsll.vi v16, v10, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsll.vi v16, v10, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2176,11 +2172,11 @@ ; ; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v12, v8 +; RV64-NEXT: vsll.vi v16, v12, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2473,11 +2469,11 @@ ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v10 -; RV64-NEXT: vzext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v16, v8 +; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vzext.vf2 v16, v10 +; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: sub a3, a1, a2 ; RV64-NEXT: sltu a4, a1, a3 @@ -2487,14 +2483,14 @@ ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t +; RV64-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV64-NEXT: bltu a1, a2, .LBB105_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB105_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -376,20 +376,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v12, v10, v10 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v10 -; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v12, v10, v10 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i16, ptr %base, %eidxs @@ -580,20 +578,20 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -660,11 +658,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v16, v12 +; RV64-NEXT: vsll.vi v12, v16, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i32, ptr %base, %eidxs @@ -834,20 +832,20 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v18, v16 +; RV32-NEXT: vsll.vi v16, v18, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v18, v16 +; RV64-NEXT: vsll.vi v16, v18, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -914,11 +912,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v20, v16 +; RV64-NEXT: vsll.vi v16, v20, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1156,20 +1154,18 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 -; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v12, v10, v10 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v10 -; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v12, v10, v10 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds half, ptr %base, %eidxs @@ -1339,20 +1335,20 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v16, v12 -; RV32-NEXT: vsll.vi v12, v16, 2 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v14, v12 +; RV32-NEXT: vsll.vi v12, v14, 2 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v14, v12 +; RV64-NEXT: vsll.vi v12, v14, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1419,11 +1415,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 2 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v16, v12 +; RV64-NEXT: vsll.vi v12, v16, 2 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds float, ptr %base, %eidxs @@ -1593,20 +1589,20 @@ define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v18, v16 +; RV32-NEXT: vsll.vi v16, v18, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v18, v16 +; RV64-NEXT: vsll.vi v16, v18, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1673,11 +1669,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v20, v16 +; RV64-NEXT: vsll.vi v16, v20, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1843,20 +1839,20 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v20, v16 -; RV32-NEXT: vsll.vi v16, v20, 3 +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v18, v16 +; RV32-NEXT: vsll.vi v16, v18, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v18, v16 +; RV64-NEXT: vsll.vi v16, v18, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1923,11 +1919,11 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v24, v16 -; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v20, v16 +; RV64-NEXT: vsll.vi v16, v20, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2283,33 +2279,20 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 -; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vl4re16.v v24, (a1) +; RV64-NEXT: vl4re16.v v28, (a1) +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64-NEXT: vzext.vf2 v24, v30 +; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vzext.vf2 v4, v28 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v16, v26 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vzext.vf4 v16, v24 -; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vsll.vi v28, v4, 3 ; RV64-NEXT: mv a3, a2 ; RV64-NEXT: bltu a2, a1, .LBB98_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB98_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vsoxei32.v v8, (a0), v28, v0.t ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 @@ -2318,18 +2301,7 @@ ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vsoxei32.v v16, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs