diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1132,6 +1132,7 @@ SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + MVT XLenVT = Subtarget.getXLenVT(); unsigned NumElts = Op.getNumOperands(); if (VT.getVectorElementType() == MVT::i1) { @@ -1167,7 +1168,6 @@ uint64_t Bits = 0; unsigned BitPos = 0, IntegerEltIdx = 0; - MVT XLenVT = Subtarget.getXLenVT(); SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); for (unsigned I = 0; I < NumElts; I++, BitPos++) { @@ -1239,6 +1239,64 @@ } } + // Attempt to detect "hidden" splats, which only reveal themselves as splats + // when re-interpreted as a vector with a larger element type. For example, + // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 + // could be instead splat as + // v2i32 = build_vector i32 0x00010000, i32 0x00010000 + // TODO: This optimization could also work on non-constant splats, but it + // would require bit-manipulation instructions to construct the splat value. + SmallVector Sequence; + unsigned EltBitSize = VT.getScalarSizeInBits(); + const auto *BV = cast(Op); + if (VT.isInteger() && EltBitSize < 64 && + ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && + BV->getRepeatedSequence(Sequence) && + (Sequence.size() * EltBitSize) <= 64) { + unsigned SeqLen = Sequence.size(); + MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); + MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); + assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || + ViaIntVT == MVT::i64) && + "Unexpected sequence type"); + + unsigned EltIdx = 0; + uint64_t EltMask = maskTrailingOnes(EltBitSize); + uint64_t SplatValue = 0; + // Construct the amalgamated value which can be splatted as this larger + // vector type. + for (const auto &SeqV : Sequence) { + if (!SeqV.isUndef()) + SplatValue |= ((cast(SeqV)->getZExtValue() & EltMask) + << (EltIdx * EltBitSize)); + EltIdx++; + } + + // On RV64, sign-extend from 32 to 64 bits where possible in order to + // achieve better constant materializion. + if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) + SplatValue = SignExtend64(SplatValue, 32); + + // Since we can't introduce illegal i64 types at this stage, we can only + // perform an i64 splat on RV32 if it is its own sign-extended value. That + // way we can use RVV instructions to splat. + assert((ViaIntVT.bitsLE(XLenVT) || + (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && + "Unexpected bitcast sequence"); + if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { + SDValue ViaVL = + DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); + MVT ViaContainerVT = + RISCVTargetLowering::getContainerForFixedLengthVector(DAG, ViaVecVT, + Subtarget); + SDValue Splat = + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, + DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); + Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); + return DAG.getBitcast(VT, Splat); + } + } + // Try and optimize BUILD_VECTORs with "dominant values" - these are values // which constitute a large proportion of the elements. In such cases we can // splat a vector with the dominant element and make up the shortfall with @@ -1270,7 +1328,6 @@ } assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); - MVT XLenVT = Subtarget.getXLenVT(); unsigned NumDefElts = NumElts - NumUndefElts; unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -368,127 +368,99 @@ ; LMULMAX2-RV32-LABEL: bitreverse_v2i64: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV32-NEXT: addi a1, zero, 5 -; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v25, 0 -; LMULMAX2-RV32-NEXT: addi a1, zero, 24 -; LMULMAX2-RV32-NEXT: vmerge.vxm v27, v25, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v28, v26, v27 -; LMULMAX2-RV32-NEXT: lui a1, 4080 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v29, v25, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v29 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v29, v25, 8, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v30, v26, v29 -; LMULMAX2-RV32-NEXT: lui a2, 1044480 -; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v31, v25, a2, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v31 -; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28 +; LMULMAX2-RV32-NEXT: vle64.v v25, (a0) +; LMULMAX2-RV32-NEXT: addi a6, zero, 56 +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 ; LMULMAX2-RV32-NEXT: addi a2, zero, 40 -; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v25, a2, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v31, v26, v30 -; LMULMAX2-RV32-NEXT: lui a2, 16 -; LMULMAX2-RV32-NEXT: addi a2, a2, -256 -; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v25, a2, v0 -; LMULMAX2-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v31, v31, v8 -; LMULMAX2-RV32-NEXT: addi a3, zero, 56 -; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v25, a3, v0 -; LMULMAX2-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v9, v26, v8 -; LMULMAX2-RV32-NEXT: vor.vv v31, v31, v9 -; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v31 -; LMULMAX2-RV32-NEXT: vsll.vv v29, v26, v29 -; LMULMAX2-RV32-NEXT: addi a3, zero, 255 +; LMULMAX2-RV32-NEXT: vsrl.vx v27, v25, a2 +; LMULMAX2-RV32-NEXT: lui a3, 16 +; LMULMAX2-RV32-NEXT: addi a3, a3, -256 +; LMULMAX2-RV32-NEXT: vand.vx v27, v27, a3 +; LMULMAX2-RV32-NEXT: vor.vv v26, v27, v26 +; LMULMAX2-RV32-NEXT: addi a4, zero, 5 +; LMULMAX2-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a4 ; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v31, a3 -; LMULMAX2-RV32-NEXT: vmerge.vim v31, v31, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v29, v29, v31 -; LMULMAX2-RV32-NEXT: vsll.vv v27, v26, v27 -; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v31, a2 -; LMULMAX2-RV32-NEXT: vmerge.vim v31, v31, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v27, v27, v31 -; LMULMAX2-RV32-NEXT: vor.vv v27, v27, v29 -; LMULMAX2-RV32-NEXT: vsll.vv v29, v26, v30 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 -; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 0, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v27, 0 +; LMULMAX2-RV32-NEXT: lui a4, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v27, v27, a4, v0 +; LMULMAX2-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vi v28, v25, 8 +; LMULMAX2-RV32-NEXT: vand.vv v27, v28, v27 +; LMULMAX2-RV32-NEXT: vsrl.vi v28, v25, 24 +; LMULMAX2-RV32-NEXT: lui a4, 4080 +; LMULMAX2-RV32-NEXT: vand.vx v28, v28, a4 +; LMULMAX2-RV32-NEXT: vor.vv v27, v27, v28 +; LMULMAX2-RV32-NEXT: vor.vv v26, v27, v26 +; LMULMAX2-RV32-NEXT: addi a5, zero, 255 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a5 +; LMULMAX2-RV32-NEXT: vmerge.vim v27, v27, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v29, v29, v30 -; LMULMAX2-RV32-NEXT: vsll.vv v26, v26, v8 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v29 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v27 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsll.vi v28, v25, 8 +; LMULMAX2-RV32-NEXT: vand.vv v27, v28, v27 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a3 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsll.vi v29, v25, 24 +; LMULMAX2-RV32-NEXT: vand.vv v28, v29, v28 +; LMULMAX2-RV32-NEXT: vor.vv v27, v28, v27 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a4 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsll.vx v29, v25, a2 +; LMULMAX2-RV32-NEXT: vand.vv v28, v29, v28 +; LMULMAX2-RV32-NEXT: vsll.vx v25, v25, a6 +; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v28 +; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v27, v26, v27 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v28, v25, 4, v0 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v27, v27, v28 +; LMULMAX2-RV32-NEXT: vand.vv v26, v25, v26 +; LMULMAX2-RV32-NEXT: vsll.vi v26, v26, 4 ; LMULMAX2-RV32-NEXT: lui a1, 986895 ; LMULMAX2-RV32-NEXT: addi a1, a1, 240 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v29, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v29 -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v27 +; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vsrl.vi v25, v25, 4 +; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v27, v26, v27 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v28, v25, 2, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v27, v27, v28 +; LMULMAX2-RV32-NEXT: vand.vv v26, v25, v26 +; LMULMAX2-RV32-NEXT: vsll.vi v26, v26, 2 ; LMULMAX2-RV32-NEXT: lui a1, 838861 ; LMULMAX2-RV32-NEXT: addi a1, a1, -820 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v29, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v29 -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v27 +; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vsrl.vi v25, v25, 2 +; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v27, v26, v27 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v27, v27, v25 +; LMULMAX2-RV32-NEXT: vand.vv v26, v25, v26 +; LMULMAX2-RV32-NEXT: vsll.vi v26, v26, 1 ; LMULMAX2-RV32-NEXT: lui a1, 699051 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1366 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: vsrl.vv v25, v26, v25 -; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vsrl.vi v25, v25, 1 +; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: vse64.v v25, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -596,127 +568,99 @@ ; LMULMAX1-RV32-LABEL: bitreverse_v2i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX1-RV32-NEXT: addi a1, zero, 5 -; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v25, 0 -; LMULMAX1-RV32-NEXT: addi a1, zero, 24 -; LMULMAX1-RV32-NEXT: vmerge.vxm v27, v25, a1, v0 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v28, v26, v27 -; LMULMAX1-RV32-NEXT: lui a1, 4080 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v29, v25, a1, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v29 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v29, v25, 8, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v30, v26, v29 -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v31, v25, a2, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v30, v30, v31 -; LMULMAX1-RV32-NEXT: vor.vv v28, v30, v28 +; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a6, zero, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 ; LMULMAX1-RV32-NEXT: addi a2, zero, 40 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v30, v25, a2, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v31, v26, v30 -; LMULMAX1-RV32-NEXT: lui a2, 16 -; LMULMAX1-RV32-NEXT: addi a2, a2, -256 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v25, a2, v0 -; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v8 -; LMULMAX1-RV32-NEXT: addi a3, zero, 56 -; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v25, a3, v0 -; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v26, v8 -; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v9 -; LMULMAX1-RV32-NEXT: vor.vv v28, v28, v31 -; LMULMAX1-RV32-NEXT: vsll.vv v29, v26, v29 -; LMULMAX1-RV32-NEXT: addi a3, zero, 255 +; LMULMAX1-RV32-NEXT: vsrl.vx v27, v25, a2 +; LMULMAX1-RV32-NEXT: lui a3, 16 +; LMULMAX1-RV32-NEXT: addi a3, a3, -256 +; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a3 +; LMULMAX1-RV32-NEXT: vor.vv v26, v27, v26 +; LMULMAX1-RV32-NEXT: addi a4, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a4 ; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v31, a3 -; LMULMAX1-RV32-NEXT: vmerge.vim v31, v31, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v31 -; LMULMAX1-RV32-NEXT: vsll.vv v27, v26, v27 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v31, a2 -; LMULMAX1-RV32-NEXT: vmerge.vim v31, v31, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v31 -; LMULMAX1-RV32-NEXT: vor.vv v27, v27, v29 -; LMULMAX1-RV32-NEXT: vsll.vv v29, v26, v30 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v30, a1 -; LMULMAX1-RV32-NEXT: vmerge.vim v30, v30, 0, v0 +; LMULMAX1-RV32-NEXT: vmv.v.i v27, 0 +; LMULMAX1-RV32-NEXT: lui a4, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v27, v27, a4, v0 +; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vi v28, v25, 8 +; LMULMAX1-RV32-NEXT: vand.vv v27, v28, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v28, v25, 24 +; LMULMAX1-RV32-NEXT: lui a4, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v28, v28, a4 +; LMULMAX1-RV32-NEXT: vor.vv v27, v27, v28 +; LMULMAX1-RV32-NEXT: vor.vv v26, v27, v26 +; LMULMAX1-RV32-NEXT: addi a5, zero, 255 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a5 +; LMULMAX1-RV32-NEXT: vmerge.vim v27, v27, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v30 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v8 -; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v29 -; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v27 -; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v28 +; LMULMAX1-RV32-NEXT: vsll.vi v28, v25, 8 +; LMULMAX1-RV32-NEXT: vand.vv v27, v28, v27 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v28, a3 +; LMULMAX1-RV32-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsll.vi v29, v25, 24 +; LMULMAX1-RV32-NEXT: vand.vv v28, v29, v28 +; LMULMAX1-RV32-NEXT: vor.vv v27, v28, v27 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v28, a4 +; LMULMAX1-RV32-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsll.vx v29, v25, a2 +; LMULMAX1-RV32-NEXT: vand.vv v28, v29, v28 +; LMULMAX1-RV32-NEXT: vsll.vx v25, v25, a6 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a1, a1, -241 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v27, v26, v27 -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v28, v25, 4, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v27, v27, v28 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v26 +; LMULMAX1-RV32-NEXT: vsll.vi v26, v26, 4 ; LMULMAX1-RV32-NEXT: lui a1, 986895 ; LMULMAX1-RV32-NEXT: addi a1, a1, 240 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v29, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v29 -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v28 -; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 4 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v27, v26, v27 -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v28, v25, 2, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v27, v27, v28 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v26 +; LMULMAX1-RV32-NEXT: vsll.vi v26, v26, 2 ; LMULMAX1-RV32-NEXT: lui a1, 838861 ; LMULMAX1-RV32-NEXT: addi a1, a1, -820 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v29, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v29 -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v28 -; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v27, v26, v27 -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v25, v25, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v27, v27, v25 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v26 +; LMULMAX1-RV32-NEXT: vsll.vi v26, v26, 1 ; LMULMAX1-RV32-NEXT: lui a1, 699051 ; LMULMAX1-RV32-NEXT: addi a1, a1, -1366 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v26, v25 -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 1 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -1288,127 +1232,99 @@ ; LMULMAX2-RV32-LABEL: bitreverse_v4i64: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle64.v v28, (a0) -; LMULMAX2-RV32-NEXT: addi a1, zero, 85 -; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v26, 0 -; LMULMAX2-RV32-NEXT: addi a1, zero, 24 -; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v26, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v28, v30 -; LMULMAX2-RV32-NEXT: lui a1, 4080 -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v26, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v26, 8, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v12, v28, v10 -; LMULMAX2-RV32-NEXT: lui a2, 1044480 -; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v26, a2, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v8 +; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) +; LMULMAX2-RV32-NEXT: addi a6, zero, 56 +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 ; LMULMAX2-RV32-NEXT: addi a2, zero, 40 -; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v26, a2, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v14, v28, v8 -; LMULMAX2-RV32-NEXT: lui a2, 16 -; LMULMAX2-RV32-NEXT: addi a2, a2, -256 -; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v26, a2, v0 -; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v14, v14, v16 -; LMULMAX2-RV32-NEXT: addi a3, zero, 56 -; LMULMAX2-RV32-NEXT: vsetivli a4, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v26, a3, v0 -; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v18, v28, v16 -; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v18 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 -; LMULMAX2-RV32-NEXT: vsll.vv v10, v28, v10 -; LMULMAX2-RV32-NEXT: addi a3, zero, 255 +; LMULMAX2-RV32-NEXT: vsrl.vx v30, v26, a2 +; LMULMAX2-RV32-NEXT: lui a3, 16 +; LMULMAX2-RV32-NEXT: addi a3, a3, -256 +; LMULMAX2-RV32-NEXT: vand.vx v30, v30, a3 +; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28 +; LMULMAX2-RV32-NEXT: addi a4, zero, 85 +; LMULMAX2-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a4 ; LMULMAX2-RV32-NEXT: vsetivli a4, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v14, a3 -; LMULMAX2-RV32-NEXT: vmerge.vim v14, v14, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v14 -; LMULMAX2-RV32-NEXT: vsll.vv v30, v28, v30 -; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v14, a2 -; LMULMAX2-RV32-NEXT: vmerge.vim v14, v14, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v14 -; LMULMAX2-RV32-NEXT: vor.vv v30, v30, v10 -; LMULMAX2-RV32-NEXT: vsll.vv v8, v28, v8 -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 0, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 +; LMULMAX2-RV32-NEXT: lui a4, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a4, v0 +; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vi v8, v26, 8 +; LMULMAX2-RV32-NEXT: vand.vv v30, v8, v30 +; LMULMAX2-RV32-NEXT: vsrl.vi v8, v26, 24 +; LMULMAX2-RV32-NEXT: lui a4, 4080 +; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 +; LMULMAX2-RV32-NEXT: vor.vv v30, v30, v8 +; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28 +; LMULMAX2-RV32-NEXT: addi a5, zero, 255 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a5 +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsll.vv v28, v28, v16 -; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v8 -; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v30 -; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v12 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v26, 8 +; LMULMAX2-RV32-NEXT: vand.vv v30, v8, v30 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v8, a3 +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v8, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsll.vi v10, v26, 24 +; LMULMAX2-RV32-NEXT: vand.vv v8, v10, v8 +; LMULMAX2-RV32-NEXT: vor.vv v30, v8, v30 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v8, a4 +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v8, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsll.vx v10, v26, a2 +; LMULMAX2-RV32-NEXT: vand.vv v8, v10, v8 +; LMULMAX2-RV32-NEXT: vsll.vx v26, v26, a6 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v8 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v28, v30 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v8, v26, 4, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v30, v30, v8 +; LMULMAX2-RV32-NEXT: vand.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: vsll.vi v28, v28, 4 ; LMULMAX2-RV32-NEXT: lui a1, 986895 ; LMULMAX2-RV32-NEXT: addi a1, a1, 240 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v10 -; LMULMAX2-RV32-NEXT: vsrl.vv v28, v28, v8 -; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vsrl.vi v26, v26, 4 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v28, v30 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v8, v26, 2, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v30, v30, v8 +; LMULMAX2-RV32-NEXT: vand.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: vsll.vi v28, v28, 2 ; LMULMAX2-RV32-NEXT: lui a1, 838861 ; LMULMAX2-RV32-NEXT: addi a1, a1, -820 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v10 -; LMULMAX2-RV32-NEXT: vsrl.vv v28, v28, v8 -; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vsrl.vi v26, v26, 2 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v28, v30 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v26, v26, 1, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v30, v30, v26 +; LMULMAX2-RV32-NEXT: vand.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: vsll.vi v28, v28, 1 ; LMULMAX2-RV32-NEXT: lui a1, 699051 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1366 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v8 -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v28, v26 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vsrl.vi v26, v26, 1 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -1516,167 +1432,139 @@ ; LMULMAX1-RV32-LABEL: bitreverse_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v13, (a1) -; LMULMAX1-RV32-NEXT: addi a2, zero, 5 -; LMULMAX1-RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV32-NEXT: vle64.v v30, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a6, zero, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v30, a6 +; LMULMAX1-RV32-NEXT: addi a7, zero, 40 +; LMULMAX1-RV32-NEXT: vsrl.vx v27, v30, a7 +; LMULMAX1-RV32-NEXT: lui a4, 16 +; LMULMAX1-RV32-NEXT: addi a4, a4, -256 +; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a4 +; LMULMAX1-RV32-NEXT: vor.vv v27, v27, v26 +; LMULMAX1-RV32-NEXT: addi a5, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v30, 0 -; LMULMAX1-RV32-NEXT: addi a2, zero, 24 -; LMULMAX1-RV32-NEXT: vmerge.vxm v26, v30, a2, v0 +; LMULMAX1-RV32-NEXT: vmv.v.i v26, 0 +; LMULMAX1-RV32-NEXT: lui a2, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v26, v26, a2, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v27, v13, v26 -; LMULMAX1-RV32-NEXT: lui a2, 4080 +; LMULMAX1-RV32-NEXT: vsrl.vi v28, v30, 8 +; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v26 +; LMULMAX1-RV32-NEXT: vsrl.vi v29, v30, 24 +; LMULMAX1-RV32-NEXT: lui a5, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v29, v29, a5 +; LMULMAX1-RV32-NEXT: vor.vv v28, v28, v29 +; LMULMAX1-RV32-NEXT: vor.vv v31, v28, v27 +; LMULMAX1-RV32-NEXT: addi a2, zero, 255 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v28, v30, a2, v0 -; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v29, v27, v28 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v27, v30, 8, v0 -; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v13, v27 -; LMULMAX1-RV32-NEXT: lui a3, 1044480 -; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v31, v30, a3, v0 -; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v31 -; LMULMAX1-RV32-NEXT: vor.vv v10, v8, v29 -; LMULMAX1-RV32-NEXT: addi a3, zero, 40 -; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v29, v30, a3, v0 -; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v13, v29 -; LMULMAX1-RV32-NEXT: lui a3, 16 -; LMULMAX1-RV32-NEXT: addi a3, a3, -256 -; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v30, a3, v0 -; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v11, v8, v9 -; LMULMAX1-RV32-NEXT: addi a4, zero, 56 -; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v30, a4, v0 -; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v12, v13, v8 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vor.vv v14, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vv v11, v13, v27 -; LMULMAX1-RV32-NEXT: addi a4, zero, 255 -; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a4 -; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v12, v11, v10 -; LMULMAX1-RV32-NEXT: vsll.vv v15, v13, v26 -; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a3 -; LMULMAX1-RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v11 -; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v12 -; LMULMAX1-RV32-NEXT: vsll.vv v16, v13, v29 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2 -; LMULMAX1-RV32-NEXT: vmerge.vim v12, v12, 0, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a2 +; LMULMAX1-RV32-NEXT: vmerge.vim v27, v27, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v16, v16, v12 -; LMULMAX1-RV32-NEXT: vsll.vv v13, v13, v8 -; LMULMAX1-RV32-NEXT: vor.vv v13, v13, v16 -; LMULMAX1-RV32-NEXT: vor.vv v13, v13, v15 -; LMULMAX1-RV32-NEXT: vor.vv v16, v13, v14 +; LMULMAX1-RV32-NEXT: vsll.vi v28, v30, 8 +; LMULMAX1-RV32-NEXT: vand.vv v29, v28, v27 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v28, a4 +; LMULMAX1-RV32-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsll.vi v8, v30, 24 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v28 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v29 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v29, a5 +; LMULMAX1-RV32-NEXT: vmerge.vim v29, v29, 0, v0 +; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsll.vx v9, v30, a7 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v29 +; LMULMAX1-RV32-NEXT: vsll.vx v30, v30, a6 +; LMULMAX1-RV32-NEXT: vor.vv v30, v30, v9 +; LMULMAX1-RV32-NEXT: vor.vv v30, v30, v8 +; LMULMAX1-RV32-NEXT: vor.vv v31, v30, v31 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v15, v16, v13 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v14, v30, 4, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v30, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v17, v15, v14 +; LMULMAX1-RV32-NEXT: vand.vv v8, v31, v30 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV32-NEXT: lui a2, 986895 ; LMULMAX1-RV32-NEXT: addi a2, a2, 240 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v15, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v9, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v16, v16, v15 -; LMULMAX1-RV32-NEXT: vsrl.vv v16, v16, v14 -; LMULMAX1-RV32-NEXT: vor.vv v18, v16, v17 +; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v31, v31, 4 +; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v8 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v16, a2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v19, v18, v16 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v17, v30, 2, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v19, v19, v17 +; LMULMAX1-RV32-NEXT: vand.vv v10, v31, v8 +; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 2 ; LMULMAX1-RV32-NEXT: lui a2, 838861 ; LMULMAX1-RV32-NEXT: addi a2, a2, -820 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v20, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v18, v18, v20 -; LMULMAX1-RV32-NEXT: vsrl.vv v18, v18, v17 -; LMULMAX1-RV32-NEXT: vor.vv v18, v18, v19 +; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v31, v31, 2 +; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v10 ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v19, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v21, v18, v19 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v30, v30, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v21, v21, v30 +; LMULMAX1-RV32-NEXT: vand.vv v12, v31, v10 +; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 1 ; LMULMAX1-RV32-NEXT: lui a2, 699051 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1366 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v22, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v18, v18, v22 -; LMULMAX1-RV32-NEXT: vsrl.vv v18, v18, v30 -; LMULMAX1-RV32-NEXT: vor.vv v18, v18, v21 -; LMULMAX1-RV32-NEXT: vsrl.vv v21, v25, v26 -; LMULMAX1-RV32-NEXT: vand.vv v28, v21, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v21, v25, v27 -; LMULMAX1-RV32-NEXT: vand.vv v31, v21, v31 -; LMULMAX1-RV32-NEXT: vor.vv v28, v31, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v31, v25, v29 -; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v9 -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v25, v8 -; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v9 -; LMULMAX1-RV32-NEXT: vor.vv v28, v28, v31 -; LMULMAX1-RV32-NEXT: vsll.vv v27, v25, v27 -; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v10 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v11 -; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v27 -; LMULMAX1-RV32-NEXT: vsll.vv v27, v25, v29 -; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v12 -; LMULMAX1-RV32-NEXT: vsll.vv v25, v25, v8 +; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v13 +; LMULMAX1-RV32-NEXT: vsrl.vi v31, v31, 1 +; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v12 +; LMULMAX1-RV32-NEXT: vsrl.vx v12, v25, a6 +; LMULMAX1-RV32-NEXT: vsrl.vx v14, v25, a7 +; LMULMAX1-RV32-NEXT: vand.vx v14, v14, a4 +; LMULMAX1-RV32-NEXT: vor.vv v12, v14, v12 +; LMULMAX1-RV32-NEXT: vsrl.vi v14, v25, 8 +; LMULMAX1-RV32-NEXT: vand.vv v26, v14, v26 +; LMULMAX1-RV32-NEXT: vsrl.vi v14, v25, 24 +; LMULMAX1-RV32-NEXT: vand.vx v14, v14, a5 +; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v14 +; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v12 +; LMULMAX1-RV32-NEXT: vsll.vi v12, v25, 8 +; LMULMAX1-RV32-NEXT: vand.vv v27, v12, v27 +; LMULMAX1-RV32-NEXT: vsll.vi v12, v25, 24 +; LMULMAX1-RV32-NEXT: vand.vv v28, v12, v28 +; LMULMAX1-RV32-NEXT: vor.vv v27, v28, v27 +; LMULMAX1-RV32-NEXT: vsll.vx v28, v25, a7 +; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v29 +; LMULMAX1-RV32-NEXT: vsll.vx v25, v25, a6 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v28 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v13 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v14 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v15 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v14 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v30 +; LMULMAX1-RV32-NEXT: vsll.vi v26, v26, 4 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 4 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v16 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v17 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v20 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v17 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v8 +; LMULMAX1-RV32-NEXT: vsll.vi v26, v26, 2 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v19 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v30 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v22 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v30 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v10 +; LMULMAX1-RV32-NEXT: vsll.vi v26, v26, 1 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v13 +; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v18, (a1) +; LMULMAX1-RV32-NEXT: vse64.v v31, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: bitreverse_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -264,55 +264,39 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX2-RV32-NEXT: addi a1, zero, 5 -; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v26, 0 -; LMULMAX2-RV32-NEXT: vmerge.vim v27, v26, 1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v27, v25, v27 +; LMULMAX2-RV32-NEXT: vsrl.vi v26, v25, 1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v27, v27, v28 -; LMULMAX2-RV32-NEXT: vsub.vv v25, v25, v27 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v27, v26, 2, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v27, v25, v27 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v27 +; LMULMAX2-RV32-NEXT: vsub.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v27, v27, v28 -; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX2-RV32-NEXT: vadd.vv v25, v25, v27 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v27, v26, 4, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v27, v25, v27 -; LMULMAX2-RV32-NEXT: vadd.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vand.vv v27, v25, v26 +; LMULMAX2-RV32-NEXT: vsrl.vi v25, v25, 2 +; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v26 +; LMULMAX2-RV32-NEXT: vadd.vv v25, v27, v25 +; LMULMAX2-RV32-NEXT: vsrl.vi v26, v25, 4 +; LMULMAX2-RV32-NEXT: vadd.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: lui a1, 4112 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmul.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vmul.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: addi a1, zero, 56 -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v25, v25, v26 +; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a1 ; LMULMAX2-RV32-NEXT: vse64.v v25, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -370,55 +354,39 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: addi a1, zero, 5 -; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v26, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v27, v26, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 1 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v28, a1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v28 -; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v27, v26, 2, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v27 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v28, a1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v28 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v27, v26, 4, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v27 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vand.vv v27, v25, v26 +; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v26 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v27, v25 +; LMULMAX1-RV32-NEXT: vsrl.vi v26, v25, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a1, a1, -241 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: lui a1, 4112 ; LMULMAX1-RV32-NEXT: addi a1, a1, 257 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmul.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vmul.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: addi a1, zero, 56 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26 +; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a1 ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -831,56 +799,40 @@ ; LMULMAX2-RV32-LABEL: ctpop_v4i64: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle64.v v28, (a0) -; LMULMAX2-RV32-NEXT: addi a1, zero, 85 -; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v26, 0 -; LMULMAX2-RV32-NEXT: vmerge.vim v30, v26, 1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v30, v28, v30 +; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) +; LMULMAX2-RV32-NEXT: vsrl.vi v28, v26, 1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v8 -; LMULMAX2-RV32-NEXT: vsub.vv v28, v28, v30 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v30, v26, 2, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v30, v28, v30 +; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v8 -; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v8 -; LMULMAX2-RV32-NEXT: vadd.vv v28, v28, v30 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v30, v26, 4, v0 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v30, v28, v30 -; LMULMAX2-RV32-NEXT: vadd.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vand.vv v30, v26, v28 +; LMULMAX2-RV32-NEXT: vsrl.vi v26, v26, 2 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v30, v26 +; LMULMAX2-RV32-NEXT: vsrl.vi v28, v26, 4 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a1, 4112 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmul.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vmul.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: addi a1, zero, 56 -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v28, v26 +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a1 ; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -937,72 +889,56 @@ ; LMULMAX1-RV32-LABEL: ctpop_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v26, (a1) -; LMULMAX1-RV32-NEXT: addi a2, zero, 5 -; LMULMAX1-RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v27, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v28, v27, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v29, v26, v28 +; LMULMAX1-RV32-NEXT: vle64.v v25, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v26, (a0) +; LMULMAX1-RV32-NEXT: vsrl.vi v27, v25, 1 ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v30, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v28, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v30 -; LMULMAX1-RV32-NEXT: vsub.vv v26, v26, v29 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v29, v27, 2, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v31, v26, v29 +; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v28 +; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v27 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v8 -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v8 -; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v31 -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vim v31, v27, 4, v0 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v26, v31 -; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v9 +; LMULMAX1-RV32-NEXT: vand.vv v29, v25, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v25, v25, 2 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v29, v25 +; LMULMAX1-RV32-NEXT: vsrl.vi v29, v25, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v29 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v29, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v9 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v29 ; LMULMAX1-RV32-NEXT: lui a2, 4112 ; LMULMAX1-RV32-NEXT: addi a2, a2, 257 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v30, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmul.vv v26, v26, v10 +; LMULMAX1-RV32-NEXT: vmul.vv v25, v25, v30 ; LMULMAX1-RV32-NEXT: addi a2, zero, 56 -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmerge.vxm v27, v27, a2, v0 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v27 -; LMULMAX1-RV32-NEXT: vsrl.vv v28, v25, v28 -; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v30 -; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v28, v25, v29 -; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v8 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v8 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v28, v25, v31 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v9 -; LMULMAX1-RV32-NEXT: vmul.vv v25, v25, v10 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v26, (a1) +; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a2 +; LMULMAX1-RV32-NEXT: vsrl.vi v31, v26, 1 +; LMULMAX1-RV32-NEXT: vand.vv v28, v31, v28 +; LMULMAX1-RV32-NEXT: vsub.vv v26, v26, v28 +; LMULMAX1-RV32-NEXT: vand.vv v28, v26, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v26, v26, 2 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vadd.vv v26, v28, v26 +; LMULMAX1-RV32-NEXT: vsrl.vi v27, v26, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v29 +; LMULMAX1-RV32-NEXT: vmul.vv v26, v26, v30 +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a2 +; LMULMAX1-RV32-NEXT: vse64.v v26, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v25, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ctpop_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -173,3 +173,139 @@ store <2 x i64> , <2 x i64>* %x ret void } + +define void @buildvec_seq_v8i8_v4i16(<8 x i8>* %x) { +; CHECK-LABEL: buildvec_seq_v8i8_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 513 +; CHECK-NEXT: vsetivli a2, 4, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + store <8 x i8> , <8 x i8>* %x + ret void +} + +define void @buildvec_seq_v8i8_v2i32(<8 x i8>* %x) { +; RV32-LABEL: buildvec_seq_v8i8_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 48 +; RV32-NEXT: addi a1, a1, 513 +; RV32-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a1 +; RV32-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; RV32-NEXT: vse8.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_seq_v8i8_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 48 +; RV64-NEXT: addiw a1, a1, 513 +; RV64-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a1 +; RV64-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; RV64-NEXT: vse8.v v25, (a0) +; RV64-NEXT: ret + store <8 x i8> , <8 x i8>* %x + ret void +} + +define void @buildvec_seq_v16i8_v2i64(<16 x i8>* %x) { +; RV32-LABEL: buildvec_seq_v16i8_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI14_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI14_0) +; RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; RV32-NEXT: vle8.v v25, (a1) +; RV32-NEXT: vse8.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_seq_v16i8_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 32880 +; RV64-NEXT: addiw a1, a1, 1541 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 1027 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 513 +; RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a1 +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV64-NEXT: vse8.v v25, (a0) +; RV64-NEXT: ret + store <16 x i8> , <16 x i8>* %x + ret void +} + +define void @buildvec_seq2_v16i8_v2i64(<16 x i8>* %x) { +; RV32-LABEL: buildvec_seq2_v16i8_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 528432 +; RV32-NEXT: addi a1, a1, 513 +; RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a1 +; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV32-NEXT: vse8.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_seq2_v16i8_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 528432 +; RV64-NEXT: addiw a1, a1, 513 +; RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; RV64-NEXT: vmv.v.x v25, a1 +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV64-NEXT: vse8.v v25, (a0) +; RV64-NEXT: ret + store <16 x i8> , <16 x i8>* %x + ret void +} + +define void @buildvec_seq_v9i8(<9 x i8>* %x) { +; RV32-LABEL: buildvec_seq_v9i8: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 73 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 2 +; RV32-NEXT: vmerge.vim v25, v25, 1, v0 +; RV32-NEXT: addi a1, zero, 36 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; RV32-NEXT: vmerge.vim v25, v25, 3, v0 +; RV32-NEXT: vse8.v v25, (a0) +; RV32-NEXT: addi a1, zero, 3 +; RV32-NEXT: sb a1, 8(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_seq_v9i8: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 3 +; RV64-NEXT: sb a1, 8(a0) +; RV64-NEXT: lui a1, 4104 +; RV64-NEXT: addiw a1, a1, 385 +; RV64-NEXT: slli a1, a1, 17 +; RV64-NEXT: addi a1, a1, 259 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 513 +; RV64-NEXT: sd a1, 0(a0) +; RV64-NEXT: ret + store <9 x i8> , <9 x i8>* %x + ret void +} + +define void @buildvec_seq_v4i16_v2i32(<4 x i16>* %x) { +; CHECK-LABEL: buildvec_seq_v4i16_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -127 +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + store <4 x i16> , <4 x i16>* %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1184,16 +1184,17 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; RV64-NEXT: vle32.v v25, (a0) -; RV64-NEXT: addi a1, zero, 5 -; RV64-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: lui a1, 419430 -; RV64-NEXT: addiw a1, a1, 1639 -; RV64-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV64-NEXT: lui a1, 13107 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, 973 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: slli a1, a1, 13 +; RV64-NEXT: addi a1, a1, -1639 +; RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; RV64-NEXT: vmv.v.x v26, a1 -; RV64-NEXT: lui a1, 629146 -; RV64-NEXT: addiw a1, a1, -1639 -; RV64-NEXT: vmerge.vxm v26, v26, a1, v0 +; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; RV64-NEXT: vmulh.vv v25, v25, v26 ; RV64-NEXT: vsra.vi v25, v25, 1 ; RV64-NEXT: vsrl.vi v26, v25, 31 @@ -1229,22 +1230,16 @@ ; RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; RV32-NEXT: vmul.vv v25, v25, v27 ; RV32-NEXT: vadd.vv v25, v26, v25 -; RV32-NEXT: addi a2, zero, 5 -; RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a2 -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 ; RV32-NEXT: addi a2, zero, 63 -; RV32-NEXT: vmerge.vxm v27, v26, a2, v0 -; RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v27, v25, v27 +; RV32-NEXT: vsrl.vx v26, v25, a2 ; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.s.x v28, a1 +; RV32-NEXT: vmv.s.x v27, a1 +; RV32-NEXT: vmv.v.i v28, 0 ; RV32-NEXT: vsetivli a1, 3, e32,m1,tu,mu -; RV32-NEXT: vslideup.vi v26, v28, 2 +; RV32-NEXT: vslideup.vi v28, v27, 2 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsra.vv v25, v25, v26 -; RV32-NEXT: vadd.vv v25, v25, v27 +; RV32-NEXT: vsra.vv v25, v25, v28 +; RV32-NEXT: vadd.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) ; RV32-NEXT: ret ; @@ -4622,16 +4617,17 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-RV64-NEXT: vle32.v v26, (a0) -; LMULMAX2-RV64-NEXT: addi a1, zero, 85 -; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: lui a1, 419430 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1639 -; LMULMAX2-RV64-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV64-NEXT: lui a1, 13107 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64-NEXT: slli a1, a1, 12 +; LMULMAX2-RV64-NEXT: addi a1, a1, 973 +; LMULMAX2-RV64-NEXT: slli a1, a1, 12 +; LMULMAX2-RV64-NEXT: addi a1, a1, -819 +; LMULMAX2-RV64-NEXT: slli a1, a1, 13 +; LMULMAX2-RV64-NEXT: addi a1, a1, -1639 +; LMULMAX2-RV64-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; LMULMAX2-RV64-NEXT: vmv.v.x v28, a1 -; LMULMAX2-RV64-NEXT: lui a1, 629146 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -1639 -; LMULMAX2-RV64-NEXT: vmerge.vxm v28, v28, a1, v0 +; LMULMAX2-RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: vsra.vi v26, v26, 1 ; LMULMAX2-RV64-NEXT: vsrl.vi v28, v26, 31 @@ -4673,12 +4669,12 @@ ; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV64-NEXT: addi a2, zero, 5 -; LMULMAX1-RV64-NEXT: vsetivli a3, 1, e8,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV64-NEXT: addi a2, zero, 3 +; LMULMAX1-RV64-NEXT: slli a2, a2, 33 +; LMULMAX1-RV64-NEXT: addi a2, a2, -5 +; LMULMAX1-RV64-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.x v27, a2 ; LMULMAX1-RV64-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV64-NEXT: vmv.v.i v27, 5 -; LMULMAX1-RV64-NEXT: vmerge.vim v27, v27, -5, v0 ; LMULMAX1-RV64-NEXT: vdiv.vv v26, v26, v27 ; LMULMAX1-RV64-NEXT: vdiv.vv v25, v25, v27 ; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) @@ -4715,23 +4711,17 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v30 ; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: addi a1, zero, 85 -; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v28, 0 ; LMULMAX2-RV32-NEXT: addi a1, zero, 63 -; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v28, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v30, v26, v30 +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a1 ; LMULMAX2-RV32-NEXT: addi a1, zero, 68 ; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsra.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vsra.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -5707,28 +5697,13 @@ } define void @add_iv_v2i64(<2 x i64>* %x) { -; RV32-LABEL: add_iv_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vmerge.vim v26, v26, 1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vadd.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: add_iv_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vadd.vi v25, v25, 1 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: add_iv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, 1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -5961,28 +5936,13 @@ } define void @sub_iv_v2i64(<2 x i64>* %x) { -; RV32-LABEL: sub_iv_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vmerge.vim v26, v26, 1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsub.vv v25, v26, v25 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: sub_iv_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vrsub.vi v25, v25, 1 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: sub_iv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vrsub.vi v25, v25, 1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6232,28 +6192,13 @@ } define void @and_vi_v2i64(<2 x i64>* %x) { -; RV32-LABEL: and_vi_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, -1 -; RV32-NEXT: vmerge.vim v26, v26, -2, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vand.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: and_vi_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vand.vi v25, v25, -2 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: and_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, -2 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -2, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6311,28 +6256,13 @@ } define void @and_iv_v2i64(<2 x i64>* %x) { -; RV32-LABEL: and_iv_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vmerge.vim v26, v26, 1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vand.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: and_iv_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vand.vi v25, v25, 1 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: and_iv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6486,28 +6416,13 @@ } define void @or_vi_v2i64(<2 x i64>* %x) { -; RV32-LABEL: or_vi_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, -1 -; RV32-NEXT: vmerge.vim v26, v26, -2, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vor.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: or_vi_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vor.vi v25, v25, -2 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: or_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, -2 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -2, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6565,28 +6480,13 @@ } define void @or_iv_v2i64(<2 x i64>* %x) { -; RV32-LABEL: or_iv_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vmerge.vim v26, v26, 1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vor.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: or_iv_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vor.vi v25, v25, 1 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: or_iv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, 1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6815,28 +6715,13 @@ } define void @xor_iv_v2i64(<2 x i64>* %x) { -; RV32-LABEL: xor_iv_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vmerge.vim v26, v26, 1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vxor.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: xor_iv_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vxor.vi v25, v25, 1 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: xor_iv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, 1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6990,29 +6875,13 @@ } define void @lshr_vi_v2i64(<2 x i64>* %x) { -; RV32-LABEL: lshr_vi_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: addi a1, zero, 31 -; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: lshr_vi_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vsrl.vi v25, v25, 31 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: lshr_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsrl.vi v25, v25, 31 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -7118,29 +6987,13 @@ } define void @ashr_vi_v2i64(<2 x i64>* %x) { -; RV32-LABEL: ashr_vi_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: addi a1, zero, 31 -; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsra.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: ashr_vi_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vsra.vi v25, v25, 31 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: ashr_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsra.vi v25, v25, 31 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -7246,29 +7099,13 @@ } define void @shl_vi_v2i64(<2 x i64>* %x) { -; RV32-LABEL: shl_vi_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: addi a1, zero, 5 -; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: addi a1, zero, 31 -; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsll.vv v25, v25, v26 -; RV32-NEXT: vse64.v v25, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: shl_vi_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (a0) -; RV64-NEXT: vsll.vi v25, v25, 31 -; RV64-NEXT: vse64.v v25, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: shl_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsll.vi v25, v25, 31 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -7611,11 +7448,7 @@ ; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vmulhu.vv v25, v25, v26 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vmerge.vim v26, v26, 1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v25, v25, v26 +; RV32-NEXT: vsrl.vi v25, v25, 1 ; RV32-NEXT: vse64.v v25, (a0) ; RV32-NEXT: ret ; @@ -7737,12 +7570,8 @@ ; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vmulh.vv v25, v25, v26 -; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.i v26, 0 ; RV32-NEXT: addi a1, zero, 63 -; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v26, v25, v26 +; RV32-NEXT: vsrl.vx v26, v25, a1 ; RV32-NEXT: vadd.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -1035,20 +1035,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -1071,20 +1061,10 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -1132,20 +1112,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -1168,20 +1138,10 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -1228,20 +1188,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i32_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -1264,20 +1214,10 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i32_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -1300,18 +1240,9 @@ define <8 x i64> @mgather_baseidx_v8i64(i64* %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vmerge.vim v28, v28, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v8, v28 +; RV32-NEXT: vsll.vi v28, v8, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -1973,20 +1904,10 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -2009,20 +1930,10 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -2070,20 +1981,10 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -2106,20 +2007,10 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -2166,20 +2057,10 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i32_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -2202,20 +2083,10 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i32_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v8 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v8 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -2238,18 +2109,9 @@ define <8 x double> @mgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vmerge.vim v28, v28, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v8, v28 +; RV32-NEXT: vsll.vi v28, v8, 3 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -857,20 +857,10 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -891,20 +881,10 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -948,20 +928,10 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -982,20 +952,10 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1038,20 +998,10 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i32_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1072,20 +1022,10 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i32_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1106,18 +1046,9 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vmerge.vim v28, v28, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v12, v28 +; RV32-NEXT: vsll.vi v28, v12, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1723,20 +1654,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1757,20 +1678,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1814,20 +1725,10 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1848,20 +1749,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1904,20 +1795,10 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i32_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1938,20 +1819,10 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i32_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v12 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, 3, v0 -; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v12 +; RV32-NEXT: vsll.vi v28, v28, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1972,18 +1843,9 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v25, v0 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu -; RV32-NEXT: vmv.v.i v28, 0 -; RV32-NEXT: vmerge.vim v28, v28, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v12, v28 +; RV32-NEXT: vsll.vi v28, v12, 3 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu -; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ;