diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -75,6 +75,9 @@ break; case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast(N)); break; + case ISD::VP_GATHER: + Res = PromoteIntRes_VP_GATHER(cast(N)); + break; case ISD::SELECT: case ISD::VSELECT: case ISD::VP_SELECT: @@ -805,6 +808,21 @@ return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_VP_GATHER(VPGatherSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDLoc dl(N); + SDValue Ops[] = {N->getChain(), N->getBasePtr(), N->getIndex(), + N->getScale(), N->getMask(), N->getVectorLength()}; + SDValue Res = + DAG.getGatherVP(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, + N->getMemOperand(), N->getIndexType()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Change the return type of the boolean result while obeying @@ -1644,6 +1662,9 @@ OpNo); break; case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast(N), OpNo); break; + case ISD::VP_SCATTER: + Res = PromoteIntOp_VP_SCATTER(cast(N), OpNo); + break; case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: @@ -2099,6 +2120,29 @@ N->getIndexType(), TruncateStore); } +SDValue DAGTypeLegalizer::PromoteIntOp_VP_SCATTER(VPScatterSDNode *N, + unsigned OpNo) { + SmallVector NewOps(N->op_begin(), N->op_end()); + + if (OpNo == 5) { + // The Mask + EVT DataVT = N->getValue().getValueType(); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 3) { + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); + } else { + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + } + + return DAG.getScatterVP(DAG.getVTList(MVT::Other), N->getMemoryVT(), SDLoc(N), + NewOps, N->getMemOperand(), N->getIndexType()); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); if (N->getOpcode() == ISD::VP_TRUNCATE) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -331,6 +331,7 @@ SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); + SDValue PromoteIntRes_VP_GATHER(VPGatherSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_Select(SDNode *N); @@ -395,6 +396,7 @@ SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VP_SCATTER(VPScatterSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -4,6 +4,26 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +declare <2 x i7> @llvm.vp.gather.v2i7.v2p0i7(<2 x i7*>, <2 x i1>, i32) + +define <2 x i7> @vpgather_v2i7(<2 x i7*> %ptrs, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_v2i7: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_v2i7: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call <2 x i7> @llvm.vp.gather.v2i7.v2p0i7(<2 x i7*> %ptrs, <2 x i1> %m, i32 %evl) + ret <2 x i7> %v +} + declare <2 x i8> @llvm.vp.gather.v2i8.v2p0i8(<2 x i8*>, <2 x i1>, i32) define <2 x i8> @vpgather_v2i8(<2 x i8*> %ptrs, <2 x i1> %m, i32 zeroext %evl) { @@ -288,10 +308,10 @@ ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a1, a3, .LBB13_2 +; RV64-NEXT: bltu a1, a3, .LBB14_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB13_2: +; RV64-NEXT: .LBB14_2: ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu ; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu @@ -301,10 +321,10 @@ ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB13_4 +; RV64-NEXT: bltu a1, a2, .LBB14_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB13_4: +; RV64-NEXT: .LBB14_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1927,10 +1947,10 @@ ; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 0 -; RV32-NEXT: bltu a0, a2, .LBB86_2 +; RV32-NEXT: bltu a0, a2, .LBB87_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB86_2: +; RV32-NEXT: .LBB87_2: ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu ; RV32-NEXT: vslidedown.vi v24, v8, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu @@ -1938,10 +1958,10 @@ ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (zero), v24, v0.t ; RV32-NEXT: li a1, 16 -; RV32-NEXT: bltu a0, a1, .LBB86_4 +; RV32-NEXT: bltu a0, a1, .LBB87_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB86_4: +; RV32-NEXT: .LBB87_4: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t @@ -1953,19 +1973,19 @@ ; RV64-NEXT: addi a2, a0, -16 ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: li a1, 0 -; RV64-NEXT: bltu a0, a2, .LBB86_2 +; RV64-NEXT: bltu a0, a2, .LBB87_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB86_2: +; RV64-NEXT: .LBB87_2: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v24, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t ; RV64-NEXT: li a1, 16 -; RV64-NEXT: bltu a0, a1, .LBB86_4 +; RV64-NEXT: bltu a0, a1, .LBB87_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB86_4: +; RV64-NEXT: .LBB87_4: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t @@ -1979,10 +1999,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB87_2 +; RV32-NEXT: bltu a1, a3, .LBB88_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB87_2: +; RV32-NEXT: .LBB88_2: ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV32-NEXT: vsext.vf4 v16, v8 @@ -1991,10 +2011,10 @@ ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB87_4 +; RV32-NEXT: bltu a1, a3, .LBB88_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB87_4: +; RV32-NEXT: .LBB88_4: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu @@ -2008,10 +2028,10 @@ ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a1, a3, .LBB87_2 +; RV64-NEXT: bltu a1, a3, .LBB88_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB87_2: +; RV64-NEXT: .LBB88_2: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v10, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu @@ -2022,10 +2042,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB87_4 +; RV64-NEXT: bltu a1, a2, .LBB88_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB87_4: +; RV64-NEXT: .LBB88_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsll.vi v24, v24, 3 @@ -2048,10 +2068,10 @@ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: vsext.vf8 v16, v12 -; RV32-NEXT: bltu a1, a3, .LBB88_2 +; RV32-NEXT: bltu a1, a3, .LBB89_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB88_2: +; RV32-NEXT: .LBB89_2: ; RV32-NEXT: vsext.vf8 v24, v8 ; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu @@ -2061,10 +2081,10 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t ; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB88_4 +; RV32-NEXT: bltu a1, a2, .LBB89_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB88_4: +; RV32-NEXT: .LBB89_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2083,10 +2103,10 @@ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsext.vf8 v16, v12 -; RV64-NEXT: bltu a1, a3, .LBB88_2 +; RV64-NEXT: bltu a1, a3, .LBB89_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB88_2: +; RV64-NEXT: .LBB89_2: ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v10, 2 @@ -2095,10 +2115,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB88_4 +; RV64-NEXT: bltu a1, a2, .LBB89_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB88_4: +; RV64-NEXT: .LBB89_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -2121,10 +2141,10 @@ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: vzext.vf8 v16, v12 -; RV32-NEXT: bltu a1, a3, .LBB89_2 +; RV32-NEXT: bltu a1, a3, .LBB90_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB89_2: +; RV32-NEXT: .LBB90_2: ; RV32-NEXT: vzext.vf8 v24, v8 ; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu @@ -2134,10 +2154,10 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t ; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB89_4 +; RV32-NEXT: bltu a1, a2, .LBB90_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB89_4: +; RV32-NEXT: .LBB90_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2156,10 +2176,10 @@ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vzext.vf8 v16, v12 -; RV64-NEXT: bltu a1, a3, .LBB89_2 +; RV64-NEXT: bltu a1, a3, .LBB90_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB89_2: +; RV64-NEXT: .LBB90_2: ; RV64-NEXT: vzext.vf8 v24, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v10, 2 @@ -2168,10 +2188,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB89_4 +; RV64-NEXT: bltu a1, a2, .LBB90_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB89_4: +; RV64-NEXT: .LBB90_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -2189,10 +2209,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB90_2 +; RV32-NEXT: bltu a1, a3, .LBB91_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB90_2: +; RV32-NEXT: .LBB91_2: ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV32-NEXT: vsext.vf2 v16, v8 @@ -2201,10 +2221,10 @@ ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB90_4 +; RV32-NEXT: bltu a1, a3, .LBB91_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB90_4: +; RV32-NEXT: .LBB91_4: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu @@ -2218,10 +2238,10 @@ ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a1, a3, .LBB90_2 +; RV64-NEXT: bltu a1, a3, .LBB91_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB90_2: +; RV64-NEXT: .LBB91_2: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v12, 2 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, mu @@ -2232,10 +2252,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB90_4 +; RV64-NEXT: bltu a1, a2, .LBB91_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB90_4: +; RV64-NEXT: .LBB91_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v24, v24, 3 @@ -2258,10 +2278,10 @@ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: vsext.vf4 v16, v24 -; RV32-NEXT: bltu a1, a3, .LBB91_2 +; RV32-NEXT: bltu a1, a3, .LBB92_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB91_2: +; RV32-NEXT: .LBB92_2: ; RV32-NEXT: vsext.vf4 v24, v8 ; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu @@ -2271,10 +2291,10 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB91_4 +; RV32-NEXT: bltu a1, a2, .LBB92_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB91_4: +; RV32-NEXT: .LBB92_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2293,10 +2313,10 @@ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsext.vf4 v16, v24 -; RV64-NEXT: bltu a1, a3, .LBB91_2 +; RV64-NEXT: bltu a1, a3, .LBB92_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB91_2: +; RV64-NEXT: .LBB92_2: ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v12, 2 @@ -2305,10 +2325,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB91_4 +; RV64-NEXT: bltu a1, a2, .LBB92_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB91_4: +; RV64-NEXT: .LBB92_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -2331,10 +2351,10 @@ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: vzext.vf4 v16, v24 -; RV32-NEXT: bltu a1, a3, .LBB92_2 +; RV32-NEXT: bltu a1, a3, .LBB93_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB92_2: +; RV32-NEXT: .LBB93_2: ; RV32-NEXT: vzext.vf4 v24, v8 ; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu @@ -2344,10 +2364,10 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB92_4 +; RV32-NEXT: bltu a1, a2, .LBB93_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB92_4: +; RV32-NEXT: .LBB93_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2366,10 +2386,10 @@ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vzext.vf4 v16, v24 -; RV64-NEXT: bltu a1, a3, .LBB92_2 +; RV64-NEXT: bltu a1, a3, .LBB93_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB92_2: +; RV64-NEXT: .LBB93_2: ; RV64-NEXT: vzext.vf4 v24, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v12, 2 @@ -2378,10 +2398,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB92_4 +; RV64-NEXT: bltu a1, a2, .LBB93_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB92_4: +; RV64-NEXT: .LBB93_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -2399,10 +2419,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a3, .LBB93_2 +; RV32-NEXT: bltu a1, a3, .LBB94_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB93_2: +; RV32-NEXT: .LBB94_2: ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; RV32-NEXT: vsll.vi v16, v8, 3 @@ -2410,10 +2430,10 @@ ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB93_4 +; RV32-NEXT: bltu a1, a3, .LBB94_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB93_4: +; RV32-NEXT: .LBB94_4: ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu ; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu @@ -2427,10 +2447,10 @@ ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v1, v0 ; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a1, a3, .LBB93_2 +; RV64-NEXT: bltu a1, a3, .LBB94_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB93_2: +; RV64-NEXT: .LBB94_2: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v1, 2 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu @@ -2441,10 +2461,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB93_4 +; RV64-NEXT: bltu a1, a2, .LBB94_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB93_4: +; RV64-NEXT: .LBB94_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 @@ -2467,10 +2487,10 @@ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: vsext.vf2 v16, v24 -; RV32-NEXT: bltu a1, a3, .LBB94_2 +; RV32-NEXT: bltu a1, a3, .LBB95_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB94_2: +; RV32-NEXT: .LBB95_2: ; RV32-NEXT: vsext.vf2 v24, v8 ; RV32-NEXT: vsll.vi v8, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu @@ -2480,10 +2500,10 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v4, v0.t ; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB94_4 +; RV32-NEXT: bltu a1, a2, .LBB95_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB94_4: +; RV32-NEXT: .LBB95_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v8, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2502,10 +2522,10 @@ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vsext.vf2 v16, v24 -; RV64-NEXT: bltu a1, a3, .LBB94_2 +; RV64-NEXT: bltu a1, a3, .LBB95_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB94_2: +; RV64-NEXT: .LBB95_2: ; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v1, 2 @@ -2514,10 +2534,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB94_4 +; RV64-NEXT: bltu a1, a2, .LBB95_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB94_4: +; RV64-NEXT: .LBB95_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -2540,10 +2560,10 @@ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: vzext.vf2 v16, v24 -; RV32-NEXT: bltu a1, a3, .LBB95_2 +; RV32-NEXT: bltu a1, a3, .LBB96_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB95_2: +; RV32-NEXT: .LBB96_2: ; RV32-NEXT: vzext.vf2 v24, v8 ; RV32-NEXT: vsll.vi v8, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu @@ -2553,10 +2573,10 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v4, v0.t ; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB95_4 +; RV32-NEXT: bltu a1, a2, .LBB96_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB95_4: +; RV32-NEXT: .LBB96_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v8, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2575,10 +2595,10 @@ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vzext.vf2 v16, v24 -; RV64-NEXT: bltu a1, a3, .LBB95_2 +; RV64-NEXT: bltu a1, a3, .LBB96_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB95_2: +; RV64-NEXT: .LBB96_2: ; RV64-NEXT: vzext.vf2 v24, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v1, 2 @@ -2587,10 +2607,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB95_4 +; RV64-NEXT: bltu a1, a2, .LBB96_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB95_4: +; RV64-NEXT: .LBB96_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -2609,10 +2629,10 @@ ; RV32-NEXT: addi a3, a1, -16 ; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: li a2, 0 -; RV32-NEXT: bltu a1, a3, .LBB96_2 +; RV32-NEXT: bltu a1, a3, .LBB97_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB96_2: +; RV32-NEXT: .LBB97_2: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v16, v16, 3 ; RV32-NEXT: vsetvli zero, a2, e32, m4, ta, mu @@ -2622,10 +2642,10 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB96_4 +; RV32-NEXT: bltu a1, a2, .LBB97_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB96_4: +; RV32-NEXT: .LBB97_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v8, v8, 3 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2640,10 +2660,10 @@ ; RV64-NEXT: addi a3, a1, -16 ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a1, a3, .LBB96_2 +; RV64-NEXT: bltu a1, a3, .LBB97_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB96_2: +; RV64-NEXT: .LBB97_2: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v24, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu @@ -2651,10 +2671,10 @@ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB96_4 +; RV64-NEXT: bltu a1, a2, .LBB97_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB96_4: +; RV64-NEXT: .LBB97_4: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -4,6 +4,24 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +declare void @llvm.vp.scatter.v2i7.v2p0i7(<2 x i7>, <2 x i7*>, <2 x i1>, i32) + +define void @vpscatter_v2i7(<2 x i7> %val, <2 x i7*> %ptrs, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_v2i7: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_v2i7: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.v2i7.v2p0i7(<2 x i7> %val, <2 x i7*> %ptrs, <2 x i1> %m, i32 %evl) + ret void +} + declare void @llvm.vp.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, <2 x i1>, i32) define void @vpscatter_v2i8(<2 x i8> %val, <2 x i8*> %ptrs, <2 x i1> %m, i32 zeroext %evl) { @@ -1727,18 +1745,18 @@ ; RV32-NEXT: vle32.v v24, (a0) ; RV32-NEXT: li a0, 16 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a0, .LBB79_2 +; RV32-NEXT: bltu a1, a0, .LBB80_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB79_2: +; RV32-NEXT: .LBB80_2: ; RV32-NEXT: li a0, 0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t -; RV32-NEXT: bltu a1, a2, .LBB79_4 +; RV32-NEXT: bltu a1, a2, .LBB80_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB79_4: +; RV32-NEXT: .LBB80_4: ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu ; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu @@ -1766,10 +1784,10 @@ ; RV64-NEXT: li a3, 16 ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: bltu a2, a3, .LBB79_2 +; RV64-NEXT: bltu a2, a3, .LBB80_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB79_2: +; RV64-NEXT: .LBB80_2: ; RV64-NEXT: li a3, 0 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -1780,10 +1798,10 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t -; RV64-NEXT: bltu a2, a0, .LBB79_4 +; RV64-NEXT: bltu a2, a0, .LBB80_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a3, a0 -; RV64-NEXT: .LBB79_4: +; RV64-NEXT: .LBB80_4: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu @@ -1807,19 +1825,19 @@ ; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: bltu a2, a3, .LBB80_2 +; RV32-NEXT: bltu a2, a3, .LBB81_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB80_2: +; RV32-NEXT: .LBB81_2: ; RV32-NEXT: li a3, 0 ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: bltu a2, a1, .LBB80_4 +; RV32-NEXT: bltu a2, a1, .LBB81_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB80_4: +; RV32-NEXT: .LBB81_4: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu @@ -1847,10 +1865,10 @@ ; RV64-NEXT: addi a3, sp, 16 ; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB80_2 +; RV64-NEXT: bltu a2, a1, .LBB81_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a3, 16 -; RV64-NEXT: .LBB80_2: +; RV64-NEXT: .LBB81_2: ; RV64-NEXT: li a1, 0 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a4, vlenb @@ -1864,10 +1882,10 @@ ; RV64-NEXT: addi a4, sp, 16 ; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: bltu a2, a3, .LBB80_4 +; RV64-NEXT: bltu a2, a3, .LBB81_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a1, a3 -; RV64-NEXT: .LBB80_4: +; RV64-NEXT: .LBB81_4: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu @@ -1916,10 +1934,10 @@ ; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsext.vf2 v16, v24 ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: bltu a2, a3, .LBB81_2 +; RV32-NEXT: bltu a2, a3, .LBB82_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB81_2: +; RV32-NEXT: .LBB82_2: ; RV32-NEXT: li a3, 0 ; RV32-NEXT: vsext.vf2 v24, v8 ; RV32-NEXT: vsll.vi v8, v16, 3 @@ -1930,10 +1948,10 @@ ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: bltu a2, a1, .LBB81_4 +; RV32-NEXT: bltu a2, a1, .LBB82_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB81_4: +; RV32-NEXT: .LBB82_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v8, v24, 3 ; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu @@ -1982,10 +2000,10 @@ ; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsext.vf2 v8, v24 ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: bltu a2, a3, .LBB81_2 +; RV64-NEXT: bltu a2, a3, .LBB82_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB81_2: +; RV64-NEXT: .LBB82_2: ; RV64-NEXT: li a3, 0 ; RV64-NEXT: addi a4, sp, 16 ; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload @@ -1999,10 +2017,10 @@ ; RV64-NEXT: addi a4, a4, 16 ; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t -; RV64-NEXT: bltu a2, a1, .LBB81_4 +; RV64-NEXT: bltu a2, a1, .LBB82_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB81_4: +; RV64-NEXT: .LBB82_4: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu @@ -2050,10 +2068,10 @@ ; RV32-NEXT: li a3, 16 ; RV32-NEXT: vzext.vf2 v16, v24 ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: bltu a2, a3, .LBB82_2 +; RV32-NEXT: bltu a2, a3, .LBB83_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB82_2: +; RV32-NEXT: .LBB83_2: ; RV32-NEXT: li a3, 0 ; RV32-NEXT: vzext.vf2 v24, v8 ; RV32-NEXT: vsll.vi v8, v16, 3 @@ -2064,10 +2082,10 @@ ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: bltu a2, a1, .LBB82_4 +; RV32-NEXT: bltu a2, a1, .LBB83_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB82_4: +; RV32-NEXT: .LBB83_4: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v8, v24, 3 ; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu @@ -2116,10 +2134,10 @@ ; RV64-NEXT: li a3, 16 ; RV64-NEXT: vzext.vf2 v8, v24 ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: bltu a2, a3, .LBB82_2 +; RV64-NEXT: bltu a2, a3, .LBB83_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB82_2: +; RV64-NEXT: .LBB83_2: ; RV64-NEXT: li a3, 0 ; RV64-NEXT: addi a4, sp, 16 ; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload @@ -2133,10 +2151,10 @@ ; RV64-NEXT: addi a4, a4, 16 ; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t -; RV64-NEXT: bltu a2, a1, .LBB82_4 +; RV64-NEXT: bltu a2, a1, .LBB83_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB82_4: +; RV64-NEXT: .LBB83_4: ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -44,6 +44,26 @@ ret %v } +declare @llvm.vp.gather.nxv2i7.nxv2p0i7(, , i32) + +define @vpgather_nxv2i7( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv2i7: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv2i7: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv2i7.nxv2p0i7( %ptrs, %m, i32 %evl) + ret %v +} + define @vpgather_nxv2i8_sextload_nxv2i16( %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i16: ; RV32: # %bb.0: @@ -264,18 +284,18 @@ ; RV32-NEXT: slli a2, a2, 1 ; RV32-NEXT: sub a4, a1, a2 ; RV32-NEXT: vslidedown.vx v0, v0, a5 -; RV32-NEXT: bltu a1, a4, .LBB12_2 +; RV32-NEXT: bltu a1, a4, .LBB13_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a4 -; RV32-NEXT: .LBB12_2: +; RV32-NEXT: .LBB13_2: ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, mu ; RV32-NEXT: vsext.vf4 v24, v10 ; RV32-NEXT: vsetvli zero, a3, e8, m2, ta, mu ; RV32-NEXT: vluxei32.v v18, (a0), v24, v0.t -; RV32-NEXT: bltu a1, a2, .LBB12_4 +; RV32-NEXT: bltu a1, a2, .LBB13_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB12_4: +; RV32-NEXT: .LBB13_4: ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu ; RV32-NEXT: vsext.vf4 v24, v8 ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -292,16 +312,16 @@ ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: li a4, 0 ; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a1, a6, .LBB12_2 +; RV64-NEXT: bltu a1, a6, .LBB13_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a6 -; RV64-NEXT: .LBB12_2: +; RV64-NEXT: .LBB13_2: ; RV64-NEXT: sub a6, a2, a3 ; RV64-NEXT: mv a7, a4 -; RV64-NEXT: bltu a2, a6, .LBB12_4 +; RV64-NEXT: bltu a2, a6, .LBB13_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a7, a6 -; RV64-NEXT: .LBB12_4: +; RV64-NEXT: .LBB13_4: ; RV64-NEXT: srli a6, a3, 2 ; RV64-NEXT: vsetvli t0, zero, e8, mf2, ta, mu ; RV64-NEXT: vslidedown.vx v13, v12, a6 @@ -312,34 +332,34 @@ ; RV64-NEXT: vsext.vf8 v24, v11 ; RV64-NEXT: vsetvli zero, a7, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v19, (a0), v24, v0.t -; RV64-NEXT: bltu a1, a5, .LBB12_6 +; RV64-NEXT: bltu a1, a5, .LBB13_6 ; RV64-NEXT: # %bb.5: ; RV64-NEXT: mv a1, a5 -; RV64-NEXT: .LBB12_6: +; RV64-NEXT: .LBB13_6: ; RV64-NEXT: sub a5, a1, a3 -; RV64-NEXT: bltu a1, a5, .LBB12_8 +; RV64-NEXT: bltu a1, a5, .LBB13_8 ; RV64-NEXT: # %bb.7: ; RV64-NEXT: mv a4, a5 -; RV64-NEXT: .LBB12_8: +; RV64-NEXT: .LBB13_8: ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vx v0, v12, a6 ; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v24, v9 ; RV64-NEXT: vsetvli zero, a4, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v17, (a0), v24, v0.t -; RV64-NEXT: bltu a1, a3, .LBB12_10 +; RV64-NEXT: bltu a1, a3, .LBB13_10 ; RV64-NEXT: # %bb.9: ; RV64-NEXT: mv a1, a3 -; RV64-NEXT: .LBB12_10: +; RV64-NEXT: .LBB13_10: ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t -; RV64-NEXT: bltu a2, a3, .LBB12_12 +; RV64-NEXT: bltu a2, a3, .LBB13_12 ; RV64-NEXT: # %bb.11: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB12_12: +; RV64-NEXT: .LBB13_12: ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v24, v10 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu @@ -2346,16 +2366,16 @@ ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; RV32-NEXT: sub a3, a0, a1 ; RV32-NEXT: vslidedown.vx v0, v0, a4 -; RV32-NEXT: bltu a0, a3, .LBB102_2 +; RV32-NEXT: bltu a0, a3, .LBB103_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a3 -; RV32-NEXT: .LBB102_2: +; RV32-NEXT: .LBB103_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (zero), v12, v0.t -; RV32-NEXT: bltu a0, a1, .LBB102_4 +; RV32-NEXT: bltu a0, a1, .LBB103_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB102_4: +; RV32-NEXT: .LBB103_4: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t @@ -2371,16 +2391,16 @@ ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a3, a0, a1 ; RV64-NEXT: vslidedown.vx v0, v0, a4 -; RV64-NEXT: bltu a0, a3, .LBB102_2 +; RV64-NEXT: bltu a0, a3, .LBB103_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a3 -; RV64-NEXT: .LBB102_2: +; RV64-NEXT: .LBB103_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t -; RV64-NEXT: bltu a0, a1, .LBB102_4 +; RV64-NEXT: bltu a0, a1, .LBB103_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB102_4: +; RV64-NEXT: .LBB103_4: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t @@ -2399,19 +2419,19 @@ ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu ; RV32-NEXT: sub a4, a1, a2 ; RV32-NEXT: vslidedown.vx v0, v0, a5 -; RV32-NEXT: bltu a1, a4, .LBB103_2 +; RV32-NEXT: bltu a1, a4, .LBB104_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a4 -; RV32-NEXT: .LBB103_2: +; RV32-NEXT: .LBB104_2: ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, mu ; RV32-NEXT: vsext.vf2 v16, v8 ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB103_4 +; RV32-NEXT: bltu a1, a2, .LBB104_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB103_4: +; RV32-NEXT: .LBB104_4: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2426,19 +2446,19 @@ ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a4, a1, a2 ; RV64-NEXT: vslidedown.vx v0, v0, a5 -; RV64-NEXT: bltu a1, a4, .LBB103_2 +; RV64-NEXT: bltu a1, a4, .LBB104_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a4 -; RV64-NEXT: .LBB103_2: +; RV64-NEXT: .LBB104_2: ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v16, v10 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: bltu a1, a2, .LBB103_4 +; RV64-NEXT: bltu a1, a2, .LBB104_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB103_4: +; RV64-NEXT: .LBB104_4: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v24, v24, 3 @@ -2458,10 +2478,10 @@ ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: vsext.vf4 v16, v8 ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bltu a1, a2, .LBB104_2 +; RV32-NEXT: bltu a1, a2, .LBB105_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: .LBB104_2: +; RV32-NEXT: .LBB105_2: ; RV32-NEXT: li a4, 0 ; RV32-NEXT: vsext.vf4 v24, v10 ; RV32-NEXT: vsll.vi v8, v16, 3 @@ -2473,10 +2493,10 @@ ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV32-NEXT: sub a2, a1, a2 ; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: bltu a1, a2, .LBB104_4 +; RV32-NEXT: bltu a1, a2, .LBB105_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a4, a2 -; RV32-NEXT: .LBB104_4: +; RV32-NEXT: .LBB105_4: ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu @@ -2496,19 +2516,19 @@ ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a4, a1, a2 ; RV64-NEXT: vslidedown.vx v0, v0, a5 -; RV64-NEXT: bltu a1, a4, .LBB104_2 +; RV64-NEXT: bltu a1, a4, .LBB105_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a4 -; RV64-NEXT: .LBB104_2: +; RV64-NEXT: .LBB105_2: ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: bltu a1, a2, .LBB104_4 +; RV64-NEXT: bltu a1, a2, .LBB105_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB104_4: +; RV64-NEXT: .LBB105_4: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -2528,10 +2548,10 @@ ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: vzext.vf4 v16, v8 ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bltu a1, a2, .LBB105_2 +; RV32-NEXT: bltu a1, a2, .LBB106_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: .LBB105_2: +; RV32-NEXT: .LBB106_2: ; RV32-NEXT: li a4, 0 ; RV32-NEXT: vzext.vf4 v24, v10 ; RV32-NEXT: vsll.vi v8, v16, 3 @@ -2543,10 +2563,10 @@ ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV32-NEXT: sub a2, a1, a2 ; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: bltu a1, a2, .LBB105_4 +; RV32-NEXT: bltu a1, a2, .LBB106_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a4, a2 -; RV32-NEXT: .LBB105_4: +; RV32-NEXT: .LBB106_4: ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu @@ -2566,19 +2586,19 @@ ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a4, a1, a2 ; RV64-NEXT: vslidedown.vx v0, v0, a5 -; RV64-NEXT: bltu a1, a4, .LBB105_2 +; RV64-NEXT: bltu a1, a4, .LBB106_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a4 -; RV64-NEXT: .LBB105_2: +; RV64-NEXT: .LBB106_2: ; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu ; RV64-NEXT: vzext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: bltu a1, a2, .LBB105_4 +; RV64-NEXT: bltu a1, a2, .LBB106_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB105_4: +; RV64-NEXT: .LBB106_4: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -40,6 +40,24 @@ ret void } +declare void @llvm.vp.scatter.nxv2i7.nxv2p0i7(, , , i32) + +define void @vpscatter_nxv2i7( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv2i7: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv2i7: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv2i7.nxv2p0i7( %val, %ptrs, %m, i32 %evl) + ret void +} + define void @vpscatter_nxv2i16_truncstore_nxv2i8( %val, %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8: ; RV32: # %bb.0: @@ -2080,10 +2098,10 @@ ; RV32-NEXT: vl8re32.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a0, .LBB95_2 +; RV32-NEXT: bltu a1, a0, .LBB96_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a0 -; RV32-NEXT: .LBB95_2: +; RV32-NEXT: .LBB96_2: ; RV32-NEXT: li a3, 0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t @@ -2091,10 +2109,10 @@ ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu ; RV32-NEXT: sub a0, a1, a0 ; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: bltu a1, a0, .LBB95_4 +; RV32-NEXT: bltu a1, a0, .LBB96_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a3, a0 -; RV32-NEXT: .LBB95_4: +; RV32-NEXT: .LBB96_4: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret @@ -2113,10 +2131,10 @@ ; RV64-NEXT: slli a3, a1, 3 ; RV64-NEXT: add a0, a0, a3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB95_2 +; RV64-NEXT: bltu a2, a1, .LBB96_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB95_2: +; RV64-NEXT: .LBB96_2: ; RV64-NEXT: li a4, 0 ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu @@ -2125,10 +2143,10 @@ ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a0, a2, a1 ; RV64-NEXT: vslidedown.vx v0, v0, a3 -; RV64-NEXT: bltu a2, a0, .LBB95_4 +; RV64-NEXT: bltu a2, a0, .LBB96_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a4, a0 -; RV64-NEXT: .LBB95_4: +; RV64-NEXT: .LBB96_4: ; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload @@ -2148,10 +2166,10 @@ ; RV32-NEXT: vl4re16.v v4, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB96_2 +; RV32-NEXT: bltu a2, a1, .LBB97_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB96_2: +; RV32-NEXT: .LBB97_2: ; RV32-NEXT: li a4, 0 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, mu ; RV32-NEXT: vsext.vf2 v24, v4 @@ -2162,10 +2180,10 @@ ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV32-NEXT: sub a1, a2, a1 ; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: bltu a2, a1, .LBB96_4 +; RV32-NEXT: bltu a2, a1, .LBB97_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB96_4: +; RV32-NEXT: .LBB97_4: ; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, mu ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2175,10 +2193,10 @@ ; RV64-NEXT: vl4re16.v v4, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB96_2 +; RV64-NEXT: bltu a2, a1, .LBB97_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB96_2: +; RV64-NEXT: .LBB97_2: ; RV64-NEXT: li a4, 0 ; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v24, v4 @@ -2189,10 +2207,10 @@ ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a1, a2, a1 ; RV64-NEXT: vslidedown.vx v0, v0, a3 -; RV64-NEXT: bltu a2, a1, .LBB96_4 +; RV64-NEXT: bltu a2, a1, .LBB97_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a4, a1 -; RV64-NEXT: .LBB96_4: +; RV64-NEXT: .LBB97_4: ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v8, v6 ; RV64-NEXT: vsll.vi v8, v8, 3 @@ -2224,10 +2242,10 @@ ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsext.vf4 v8, v24 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB97_2 +; RV32-NEXT: bltu a2, a1, .LBB98_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB97_2: +; RV32-NEXT: .LBB98_2: ; RV32-NEXT: li a4, 0 ; RV32-NEXT: vsext.vf4 v16, v26 ; RV32-NEXT: vsll.vi v8, v8, 3 @@ -2241,10 +2259,10 @@ ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV32-NEXT: sub a1, a2, a1 ; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: bltu a2, a1, .LBB97_4 +; RV32-NEXT: bltu a2, a1, .LBB98_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB97_4: +; RV32-NEXT: .LBB98_4: ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v8, v16, 3 ; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu @@ -2281,10 +2299,10 @@ ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsext.vf4 v8, v24 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB97_2 +; RV64-NEXT: bltu a2, a1, .LBB98_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB97_2: +; RV64-NEXT: .LBB98_2: ; RV64-NEXT: li a4, 0 ; RV64-NEXT: vsext.vf4 v16, v26 ; RV64-NEXT: vsll.vi v8, v8, 3 @@ -2296,10 +2314,10 @@ ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a1, a2, a1 ; RV64-NEXT: vslidedown.vx v0, v0, a3 -; RV64-NEXT: bltu a2, a1, .LBB97_4 +; RV64-NEXT: bltu a2, a1, .LBB98_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a4, a1 -; RV64-NEXT: .LBB97_4: +; RV64-NEXT: .LBB98_4: ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu @@ -2340,10 +2358,10 @@ ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vzext.vf4 v8, v24 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB98_2 +; RV32-NEXT: bltu a2, a1, .LBB99_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB98_2: +; RV32-NEXT: .LBB99_2: ; RV32-NEXT: li a4, 0 ; RV32-NEXT: vzext.vf4 v16, v26 ; RV32-NEXT: vsll.vi v8, v8, 3 @@ -2357,10 +2375,10 @@ ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV32-NEXT: sub a1, a2, a1 ; RV32-NEXT: vslidedown.vx v0, v0, a3 -; RV32-NEXT: bltu a2, a1, .LBB98_4 +; RV32-NEXT: bltu a2, a1, .LBB99_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB98_4: +; RV32-NEXT: .LBB99_4: ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV32-NEXT: vsll.vi v8, v16, 3 ; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu @@ -2397,10 +2415,10 @@ ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vzext.vf4 v8, v24 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB98_2 +; RV64-NEXT: bltu a2, a1, .LBB99_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB98_2: +; RV64-NEXT: .LBB99_2: ; RV64-NEXT: li a4, 0 ; RV64-NEXT: vzext.vf4 v16, v26 ; RV64-NEXT: vsll.vi v8, v8, 3 @@ -2412,10 +2430,10 @@ ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu ; RV64-NEXT: sub a1, a2, a1 ; RV64-NEXT: vslidedown.vx v0, v0, a3 -; RV64-NEXT: bltu a2, a1, .LBB98_4 +; RV64-NEXT: bltu a2, a1, .LBB99_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: mv a4, a1 -; RV64-NEXT: .LBB98_4: +; RV64-NEXT: .LBB99_4: ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsll.vi v8, v16, 3 ; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu