diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1248,14 +1248,10 @@ // "insert" the upper element, and an insert of the lower element at position // 0, which improves codegen. SDValue DominantValue; + unsigned MostCommonCount = 0; DenseMap ValueCounts; - // Use a fairly conservative threshold. A future optimization could be to use - // multiple vmerge.vi/vmerge.vx instructions on "partially-dominant" - // elements with more relaxed thresholds. unsigned NumUndefElts = count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); - unsigned NumDefElts = NumElts - NumUndefElts; - unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; for (SDValue V : Op->op_values()) { if (V.isUndef()) @@ -1264,22 +1260,48 @@ ValueCounts.insert(std::make_pair(V, 0)); unsigned &Count = ValueCounts[V]; - // Is this value dominant? - if (++Count > DominantValueCountThreshold) + // Is this value dominant? In case of a tie, prefer the highest element as + // it's cheaper to insert near the beginning of a vector than it is at the + // end. + if (++Count >= MostCommonCount) { DominantValue = V; + MostCommonCount = Count; + } } + assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); + MVT XLenVT = Subtarget.getXLenVT(); + unsigned NumDefElts = NumElts - NumUndefElts; + unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; + // Don't perform this optimization when optimizing for size, since // materializing elements and inserting them tends to cause code bloat. - if (DominantValue && !DAG.shouldOptForSize()) { + if (!DAG.shouldOptForSize() && + ((MostCommonCount > DominantValueCountThreshold) || + (ValueCounts.size() <= Log2_32(NumDefElts)))) { + // Start by splatting the most common element. SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); - if (ValueCounts.size() != 1) { - MVT XLenVT = Subtarget.getXLenVT(); - for (unsigned I = 0; I < NumElts; ++I) { - if (!Op.getOperand(I).isUndef() && Op.getOperand(I) != DominantValue) - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, - Op.getOperand(I), DAG.getConstant(I, DL, XLenVT)); + DenseSet Processed{DominantValue}; + MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); + for (const auto &OpIdx : enumerate(Op->ops())) { + const SDValue &V = OpIdx.value(); + if (V.isUndef() || !Processed.insert(V).second) + continue; + if (ValueCounts[V] == 1) { + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, + DAG.getConstant(OpIdx.index(), DL, XLenVT)); + } else { + // Blend in all instances of this value using a VSELECT, using a + // mask where each bit signals whether that element is the one + // we're after. + SmallVector Ops; + transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { + return DAG.getConstant(V == V1, DL, XLenVT); + }); + Vec = DAG.getNode(ISD::VSELECT, DL, VT, + DAG.getBuildVector(SelMaskTy, DL, Ops), + DAG.getSplatBuildVector(VT, DL, V), Vec); } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -368,138 +368,127 @@ ; LMULMAX2-RV32-LABEL: bitreverse_v2i64: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_0) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v27, v25, v26 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_1) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_1) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v29, v25, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_2) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_2) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v29, v29, v30 -; LMULMAX2-RV32-NEXT: vor.vv v27, v29, v27 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_3) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_3) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v29, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v30, v25, v29 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_4) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_4) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v31, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v31 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_5) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_5) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v31, (a1) +; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) +; LMULMAX2-RV32-NEXT: addi a1, zero, 5 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v25, 0 +; LMULMAX2-RV32-NEXT: addi a1, zero, 24 +; LMULMAX2-RV32-NEXT: vmerge.vxm v27, v25, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v25, v31 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_6) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_6) +; LMULMAX2-RV32-NEXT: vsrl.vv v28, v26, v27 +; LMULMAX2-RV32-NEXT: lui a1, 4080 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v9, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vor.vv v30, v8, v30 -; LMULMAX2-RV32-NEXT: vor.vv v27, v30, v27 -; LMULMAX2-RV32-NEXT: vsll.vv v30, v25, v31 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_7) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_7) +; LMULMAX2-RV32-NEXT: vmerge.vxm v29, v25, a1, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v29 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v31, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v29, v25, 8, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v30, v26, v29 +; LMULMAX2-RV32-NEXT: lui a2, 1044480 +; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v31, v25, a2, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v31 -; LMULMAX2-RV32-NEXT: vsll.vv v29, v25, v29 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_8) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_8) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v31, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28 +; LMULMAX2-RV32-NEXT: addi a2, zero, 40 +; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v25, a2, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v31, v26, v30 +; LMULMAX2-RV32-NEXT: lui a2, 16 +; LMULMAX2-RV32-NEXT: addi a2, a2, -256 +; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v25, a2, v0 +; LMULMAX2-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vand.vv v31, v31, v8 +; LMULMAX2-RV32-NEXT: addi a3, zero, 56 +; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v25, a3, v0 +; LMULMAX2-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v9, v26, v8 +; LMULMAX2-RV32-NEXT: vor.vv v31, v31, v9 +; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v31 +; LMULMAX2-RV32-NEXT: vsll.vv v29, v26, v29 +; LMULMAX2-RV32-NEXT: addi a3, zero, 255 +; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v31, a3 +; LMULMAX2-RV32-NEXT: vmerge.vim v31, v31, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vand.vv v29, v29, v31 -; LMULMAX2-RV32-NEXT: vor.vv v29, v29, v30 -; LMULMAX2-RV32-NEXT: vsll.vv v26, v25, v26 -; LMULMAX2-RV32-NEXT: vsll.vv v25, v25, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_9) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_9) +; LMULMAX2-RV32-NEXT: vsll.vv v27, v26, v27 +; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v31, a2 +; LMULMAX2-RV32-NEXT: vmerge.vim v31, v31, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vand.vv v27, v27, v31 +; LMULMAX2-RV32-NEXT: vor.vv v27, v27, v29 +; LMULMAX2-RV32-NEXT: vsll.vv v29, v26, v30 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX2-RV32-NEXT: vor.vv v25, v26, v25 -; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v29 -; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vand.vv v29, v29, v30 +; LMULMAX2-RV32-NEXT: vsll.vv v26, v26, v8 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v29 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v27 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v25, v26 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_10) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_10) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v27, (a1) +; LMULMAX2-RV32-NEXT: vand.vv v27, v26, v27 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v25, 4, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v26, v26, v27 +; LMULMAX2-RV32-NEXT: vsll.vv v27, v27, v28 ; LMULMAX2-RV32-NEXT: lui a1, 986895 ; LMULMAX2-RV32-NEXT: addi a1, a1, 240 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v29, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX2-RV32-NEXT: vsrl.vv v25, v25, v27 -; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v26 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v29 +; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v27 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v25, v26 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_11) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_11) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v27, (a1) +; LMULMAX2-RV32-NEXT: vand.vv v27, v26, v27 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v25, 2, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v26, v26, v27 +; LMULMAX2-RV32-NEXT: vsll.vv v27, v27, v28 ; LMULMAX2-RV32-NEXT: lui a1, 838861 ; LMULMAX2-RV32-NEXT: addi a1, a1, -820 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v29, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX2-RV32-NEXT: vsrl.vv v25, v25, v27 -; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v26 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v29 +; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v27 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v25, v26 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI2_12) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI2_12) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v27, (a1) +; LMULMAX2-RV32-NEXT: vand.vv v27, v26, v27 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v25, v25, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v26, v26, v27 +; LMULMAX2-RV32-NEXT: vsll.vv v27, v27, v25 ; LMULMAX2-RV32-NEXT: lui a1, 699051 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1366 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX2-RV32-NEXT: vsrl.vv v25, v25, v27 -; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v26 +; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsrl.vv v25, v26, v25 +; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v27 ; LMULMAX2-RV32-NEXT: vse64.v v25, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -607,138 +596,127 @@ ; LMULMAX1-RV32-LABEL: bitreverse_v2i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_1) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_1) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v28, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v29, v25, v28 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_2) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v30, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v30 -; LMULMAX1-RV32-NEXT: vor.vv v27, v29, v27 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_3) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_3) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v29, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v30, v25, v29 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_4) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_4) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v31, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v30, v30, v31 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_5) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_5) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v31, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v26, (a0) +; LMULMAX1-RV32-NEXT: addi a1, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.i v25, 0 +; LMULMAX1-RV32-NEXT: addi a1, zero, 24 +; LMULMAX1-RV32-NEXT: vmerge.vxm v27, v25, a1, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v25, v31 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_6) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_6) +; LMULMAX1-RV32-NEXT: vsrl.vv v28, v26, v27 +; LMULMAX1-RV32-NEXT: lui a1, 4080 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vor.vv v30, v8, v30 -; LMULMAX1-RV32-NEXT: vor.vv v27, v30, v27 -; LMULMAX1-RV32-NEXT: vsll.vv v30, v25, v31 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_7) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_7) +; LMULMAX1-RV32-NEXT: vmerge.vxm v29, v25, a1, v0 +; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v29 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v31, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v29, v25, 8, v0 +; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vv v30, v26, v29 +; LMULMAX1-RV32-NEXT: lui a2, 1044480 +; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vxm v31, v25, a2, v0 +; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vand.vv v30, v30, v31 -; LMULMAX1-RV32-NEXT: vsll.vv v29, v25, v29 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_8) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_8) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v31, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vor.vv v28, v30, v28 +; LMULMAX1-RV32-NEXT: addi a2, zero, 40 +; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vxm v30, v25, a2, v0 +; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vv v31, v26, v30 +; LMULMAX1-RV32-NEXT: lui a2, 16 +; LMULMAX1-RV32-NEXT: addi a2, a2, -256 +; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v25, a2, v0 +; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v8 +; LMULMAX1-RV32-NEXT: addi a3, zero, 56 +; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v25, a3, v0 +; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vv v9, v26, v8 +; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v9 +; LMULMAX1-RV32-NEXT: vor.vv v28, v28, v31 +; LMULMAX1-RV32-NEXT: vsll.vv v29, v26, v29 +; LMULMAX1-RV32-NEXT: addi a3, zero, 255 +; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v31, a3 +; LMULMAX1-RV32-NEXT: vmerge.vim v31, v31, 0, v0 +; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v31 -; LMULMAX1-RV32-NEXT: vor.vv v29, v29, v30 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: vsll.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_9) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_9) +; LMULMAX1-RV32-NEXT: vsll.vv v27, v26, v27 +; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v31, a2 +; LMULMAX1-RV32-NEXT: vmerge.vim v31, v31, 0, v0 +; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v31 +; LMULMAX1-RV32-NEXT: vor.vv v27, v27, v29 +; LMULMAX1-RV32-NEXT: vsll.vv v29, v26, v30 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmv.v.x v30, a1 +; LMULMAX1-RV32-NEXT: vmerge.vim v30, v30, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vor.vv v25, v26, v25 -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v29 -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v30 +; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v8 +; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v29 +; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v28 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a1, a1, -241 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_10) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_10) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v27, (a1) +; LMULMAX1-RV32-NEXT: vand.vv v27, v26, v27 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v28, v25, 4, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vsll.vv v27, v27, v28 ; LMULMAX1-RV32-NEXT: lui a1, 986895 ; LMULMAX1-RV32-NEXT: addi a1, a1, 240 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v29, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v29 +; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v27 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_11) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_11) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v27, (a1) +; LMULMAX1-RV32-NEXT: vand.vv v27, v26, v27 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v28, v25, 2, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vsll.vv v27, v27, v28 ; LMULMAX1-RV32-NEXT: lui a1, 838861 ; LMULMAX1-RV32-NEXT: addi a1, a1, -820 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v29, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v29 +; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v27 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI2_12) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI2_12) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v27, (a1) +; LMULMAX1-RV32-NEXT: vand.vv v27, v26, v27 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v25, v25, 1, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vsll.vv v27, v27, v25 ; LMULMAX1-RV32-NEXT: lui a1, 699051 ; LMULMAX1-RV32-NEXT: addi a1, a1, -1366 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v28 +; LMULMAX1-RV32-NEXT: vsrl.vv v25, v26, v25 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27 ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -1310,138 +1288,127 @@ ; LMULMAX2-RV32-LABEL: bitreverse_v4i64: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_0) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_1) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_1) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v10, v26, v30 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_2) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_2) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v12, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v8 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_3) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_3) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v12, v26, v8 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_4) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_4) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v14, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_5) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_5) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v14, (a1) +; LMULMAX2-RV32-NEXT: vle64.v v28, (a0) +; LMULMAX2-RV32-NEXT: addi a1, zero, 85 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v26, 0 +; LMULMAX2-RV32-NEXT: addi a1, zero, 24 +; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v26, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v16, v26, v14 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_6) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_6) +; LMULMAX2-RV32-NEXT: vsrl.vv v8, v28, v30 +; LMULMAX2-RV32-NEXT: lui a1, 4080 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v18, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v18 -; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vv v12, v26, v14 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_7) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_7) +; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v26, a1, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v14, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v10, v26, 8, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v12, v28, v10 +; LMULMAX2-RV32-NEXT: lui a2, 1044480 +; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v26, a2, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 -; LMULMAX2-RV32-NEXT: vsll.vv v8, v26, v8 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_8) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_8) +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v8 +; LMULMAX2-RV32-NEXT: addi a2, zero, 40 +; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v26, a2, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v14, v28, v8 +; LMULMAX2-RV32-NEXT: lui a2, 16 +; LMULMAX2-RV32-NEXT: addi a2, a2, -256 +; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v26, a2, v0 +; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vand.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: addi a3, zero, 56 +; LMULMAX2-RV32-NEXT: vsetivli a4, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v26, a3, v0 +; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v18, v28, v16 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v18 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vsll.vv v10, v28, v10 +; LMULMAX2-RV32-NEXT: addi a3, zero, 255 +; LMULMAX2-RV32-NEXT: vsetivli a4, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v14, a3 +; LMULMAX2-RV32-NEXT: vmerge.vim v14, v14, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v14 +; LMULMAX2-RV32-NEXT: vsll.vv v30, v28, v30 +; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v14, a2 +; LMULMAX2-RV32-NEXT: vmerge.vim v14, v14, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v14 +; LMULMAX2-RV32-NEXT: vor.vv v30, v30, v10 +; LMULMAX2-RV32-NEXT: vsll.vv v8, v28, v8 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v14, (a1) +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vsll.vv v28, v26, v28 -; LMULMAX2-RV32-NEXT: vsll.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_9) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_9) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vor.vv v26, v28, v26 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v8 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v10 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vsll.vv v28, v28, v16 +; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v8 +; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v12 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_10) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_10) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) +; LMULMAX2-RV32-NEXT: vand.vv v30, v28, v30 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v26, 4, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vsll.vv v30, v30, v8 ; LMULMAX2-RV32-NEXT: lui a1, 986895 ; LMULMAX2-RV32-NEXT: addi a1, a1, 240 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v8 -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v10 +; LMULMAX2-RV32-NEXT: vsrl.vv v28, v28, v8 +; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v30 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_11) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_11) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) +; LMULMAX2-RV32-NEXT: vand.vv v30, v28, v30 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v26, 2, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vsll.vv v30, v30, v8 ; LMULMAX2-RV32-NEXT: lui a1, 838861 ; LMULMAX2-RV32-NEXT: addi a1, a1, -820 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v8 -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v10 +; LMULMAX2-RV32-NEXT: vsrl.vv v28, v28, v8 +; LMULMAX2-RV32-NEXT: vor.vv v28, v28, v30 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI5_12) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI5_12) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) +; LMULMAX2-RV32-NEXT: vand.vv v30, v28, v30 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v26, v26, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsll.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vsll.vv v30, v30, v26 ; LMULMAX2-RV32-NEXT: lui a1, 699051 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1366 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v8 -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v8 +; LMULMAX2-RV32-NEXT: vsrl.vv v26, v28, v26 +; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v30 ; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -1551,176 +1518,165 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_0) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v28, v11, v26 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_1) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_1) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v27, (a2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v30, v11, v27 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_2) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_2) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v29, (a2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v30, v30, v29 -; LMULMAX1-RV32-NEXT: vor.vv v9, v30, v28 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_3) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_3) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v28, (a2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v31, v11, v28 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_4) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_4) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v30, (a2) +; LMULMAX1-RV32-NEXT: vle64.v v13, (a1) +; LMULMAX1-RV32-NEXT: addi a2, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.i v30, 0 +; LMULMAX1-RV32-NEXT: addi a2, zero, 24 +; LMULMAX1-RV32-NEXT: vmerge.vxm v26, v30, a2, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v10, v31, v30 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_5) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_5) +; LMULMAX1-RV32-NEXT: vsrl.vv v27, v13, v26 +; LMULMAX1-RV32-NEXT: lui a2, 4080 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v31, (a2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v12, v11, v31 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_6) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_6) +; LMULMAX1-RV32-NEXT: vmerge.vxm v28, v30, a2, v0 +; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v29, v27, v28 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v8, (a2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v8 -; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX1-RV32-NEXT: vor.vv v12, v10, v9 -; LMULMAX1-RV32-NEXT: vsll.vv v10, v11, v31 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_7) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_7) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v13, v10, v9 -; LMULMAX1-RV32-NEXT: vsll.vv v14, v11, v28 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_8) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_8) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v14, v14, v10 -; LMULMAX1-RV32-NEXT: vor.vv v13, v14, v13 -; LMULMAX1-RV32-NEXT: vsll.vv v14, v11, v26 -; LMULMAX1-RV32-NEXT: vsll.vv v15, v11, v27 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_9) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_9) +; LMULMAX1-RV32-NEXT: vmerge.vim v27, v30, 8, v0 +; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vv v8, v13, v27 +; LMULMAX1-RV32-NEXT: lui a3, 1044480 +; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vxm v31, v30, a3, v0 +; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v31 +; LMULMAX1-RV32-NEXT: vor.vv v10, v8, v29 +; LMULMAX1-RV32-NEXT: addi a3, zero, 40 +; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vxm v29, v30, a3, v0 +; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vv v8, v13, v29 +; LMULMAX1-RV32-NEXT: lui a3, 16 +; LMULMAX1-RV32-NEXT: addi a3, a3, -256 +; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v30, a3, v0 +; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v11, v8, v9 +; LMULMAX1-RV32-NEXT: addi a4, zero, 56 +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v30, a4, v0 +; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vv v12, v13, v8 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 +; LMULMAX1-RV32-NEXT: vor.vv v14, v10, v11 +; LMULMAX1-RV32-NEXT: vsll.vv v11, v13, v27 +; LMULMAX1-RV32-NEXT: addi a4, zero, 255 +; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a4 +; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 0, v0 +; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v12, v11, v10 +; LMULMAX1-RV32-NEXT: vsll.vv v15, v13, v26 +; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v11, a3 +; LMULMAX1-RV32-NEXT: vmerge.vim v11, v11, 0, v0 +; LMULMAX1-RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v11 +; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v12 +; LMULMAX1-RV32-NEXT: vsll.vv v16, v13, v29 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v11, (a2) +; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2 +; LMULMAX1-RV32-NEXT: vmerge.vim v12, v12, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v11 -; LMULMAX1-RV32-NEXT: vor.vv v14, v14, v15 -; LMULMAX1-RV32-NEXT: vor.vv v13, v14, v13 -; LMULMAX1-RV32-NEXT: vor.vv v15, v13, v12 +; LMULMAX1-RV32-NEXT: vand.vv v16, v16, v12 +; LMULMAX1-RV32-NEXT: vsll.vv v13, v13, v8 +; LMULMAX1-RV32-NEXT: vor.vv v13, v13, v16 +; LMULMAX1-RV32-NEXT: vor.vv v13, v13, v15 +; LMULMAX1-RV32-NEXT: vor.vv v16, v13, v14 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v14, v15, v12 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_10) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_10) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v13, (a2) +; LMULMAX1-RV32-NEXT: vand.vv v15, v16, v13 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v14, v30, 4, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v16, v14, v13 +; LMULMAX1-RV32-NEXT: vsll.vv v17, v15, v14 ; LMULMAX1-RV32-NEXT: lui a2, 986895 ; LMULMAX1-RV32-NEXT: addi a2, a2, 240 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v14, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v15, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v14 -; LMULMAX1-RV32-NEXT: vsrl.vv v15, v15, v13 -; LMULMAX1-RV32-NEXT: vor.vv v17, v15, v16 +; LMULMAX1-RV32-NEXT: vand.vv v16, v16, v15 +; LMULMAX1-RV32-NEXT: vsrl.vv v16, v16, v14 +; LMULMAX1-RV32-NEXT: vor.vv v18, v16, v17 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v15, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v16, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v18, v17, v15 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_11) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_11) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v16, (a2) +; LMULMAX1-RV32-NEXT: vand.vv v19, v18, v16 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v17, v30, 2, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v18, v18, v16 +; LMULMAX1-RV32-NEXT: vsll.vv v19, v19, v17 ; LMULMAX1-RV32-NEXT: lui a2, 838861 ; LMULMAX1-RV32-NEXT: addi a2, a2, -820 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v19, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v20, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v17, v17, v19 -; LMULMAX1-RV32-NEXT: vsrl.vv v17, v17, v16 -; LMULMAX1-RV32-NEXT: vor.vv v17, v17, v18 +; LMULMAX1-RV32-NEXT: vand.vv v18, v18, v20 +; LMULMAX1-RV32-NEXT: vsrl.vv v18, v18, v17 +; LMULMAX1-RV32-NEXT: vor.vv v18, v18, v19 ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v18, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v19, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v20, v17, v18 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI5_12) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI5_12) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v21, (a2) +; LMULMAX1-RV32-NEXT: vand.vv v21, v18, v19 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v30, v30, 1, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsll.vv v20, v20, v21 +; LMULMAX1-RV32-NEXT: vsll.vv v21, v21, v30 ; LMULMAX1-RV32-NEXT: lui a2, 699051 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1366 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.v.x v22, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v17, v17, v22 -; LMULMAX1-RV32-NEXT: vsrl.vv v17, v17, v21 -; LMULMAX1-RV32-NEXT: vor.vv v17, v17, v20 -; LMULMAX1-RV32-NEXT: vsrl.vv v20, v25, v26 -; LMULMAX1-RV32-NEXT: vsrl.vv v23, v25, v27 -; LMULMAX1-RV32-NEXT: vand.vv v29, v23, v29 -; LMULMAX1-RV32-NEXT: vor.vv v29, v29, v20 -; LMULMAX1-RV32-NEXT: vsrl.vv v20, v25, v28 -; LMULMAX1-RV32-NEXT: vand.vv v30, v20, v30 -; LMULMAX1-RV32-NEXT: vsrl.vv v20, v25, v31 -; LMULMAX1-RV32-NEXT: vand.vv v8, v20, v8 -; LMULMAX1-RV32-NEXT: vor.vv v30, v8, v30 -; LMULMAX1-RV32-NEXT: vor.vv v29, v30, v29 -; LMULMAX1-RV32-NEXT: vsll.vv v30, v25, v31 -; LMULMAX1-RV32-NEXT: vand.vv v30, v30, v9 -; LMULMAX1-RV32-NEXT: vsll.vv v28, v25, v28 -; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v10 -; LMULMAX1-RV32-NEXT: vor.vv v28, v28, v30 +; LMULMAX1-RV32-NEXT: vand.vv v18, v18, v22 +; LMULMAX1-RV32-NEXT: vsrl.vv v18, v18, v30 +; LMULMAX1-RV32-NEXT: vor.vv v18, v18, v21 +; LMULMAX1-RV32-NEXT: vsrl.vv v21, v25, v26 +; LMULMAX1-RV32-NEXT: vand.vv v28, v21, v28 +; LMULMAX1-RV32-NEXT: vsrl.vv v21, v25, v27 +; LMULMAX1-RV32-NEXT: vand.vv v31, v21, v31 +; LMULMAX1-RV32-NEXT: vor.vv v28, v31, v28 +; LMULMAX1-RV32-NEXT: vsrl.vv v31, v25, v29 +; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v9 +; LMULMAX1-RV32-NEXT: vsrl.vv v9, v25, v8 +; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v9 +; LMULMAX1-RV32-NEXT: vor.vv v28, v28, v31 +; LMULMAX1-RV32-NEXT: vsll.vv v27, v25, v27 +; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v10 ; LMULMAX1-RV32-NEXT: vsll.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: vsll.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v11 -; LMULMAX1-RV32-NEXT: vor.vv v25, v26, v25 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v11 +; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vsll.vv v27, v25, v29 +; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v12 +; LMULMAX1-RV32-NEXT: vsll.vv v25, v25, v8 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v29 -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v12 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v13 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v14 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v13 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v13 +; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v14 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v15 +; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v14 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v15 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v16 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v19 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v16 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v16 +; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v17 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v20 +; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v17 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v18 -; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v21 +; LMULMAX1-RV32-NEXT: vand.vv v26, v25, v19 +; LMULMAX1-RV32-NEXT: vsll.vv v26, v26, v30 ; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v22 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v21 +; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v30 ; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v17, (a1) +; LMULMAX1-RV32-NEXT: vse64.v v18, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: bitreverse_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -264,56 +264,53 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI3_0) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v26, (a1) +; LMULMAX2-RV32-NEXT: addi a1, zero, 5 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v26, 0 +; LMULMAX2-RV32-NEXT: vmerge.vim v27, v26, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v25, v26 +; LMULMAX2-RV32-NEXT: vsrl.vv v27, v25, v27 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v27 -; LMULMAX2-RV32-NEXT: vsub.vv v25, v25, v26 +; LMULMAX2-RV32-NEXT: vand.vv v27, v27, v28 +; LMULMAX2-RV32-NEXT: vsub.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v27, v26, 2, v0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v27, v25, v27 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v27, v25, v26 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI3_1) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI3_1) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v25, v25, v28 -; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v26 -; LMULMAX2-RV32-NEXT: vadd.vv v25, v27, v25 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI3_2) -; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v26, (a1) +; LMULMAX2-RV32-NEXT: vand.vv v27, v27, v28 +; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v28 +; LMULMAX2-RV32-NEXT: vadd.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v27, v26, 4, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v25, v26 -; LMULMAX2-RV32-NEXT: vadd.vv v25, v25, v26 +; LMULMAX2-RV32-NEXT: vsrl.vv v27, v25, v27 +; LMULMAX2-RV32-NEXT: vadd.vv v25, v25, v27 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v26 +; LMULMAX2-RV32-NEXT: vand.vv v25, v25, v27 ; LMULMAX2-RV32-NEXT: lui a1, 4112 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vmul.vv v25, v25, v26 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI3_3) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI3_3) +; LMULMAX2-RV32-NEXT: vmul.vv v25, v25, v27 +; LMULMAX2-RV32-NEXT: addi a1, zero, 56 ; LMULMAX2-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v26, (a1) +; LMULMAX2-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vv v25, v25, v26 ; LMULMAX2-RV32-NEXT: vse64.v v25, (a0) @@ -373,56 +370,53 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI3_0) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) +; LMULMAX1-RV32-NEXT: addi a1, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.i v26, 0 +; LMULMAX1-RV32-NEXT: vmerge.vim v27, v26, 1, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v25, v26 +; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v27 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v27 -; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v26 +; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v28 +; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v27, v26, 2, v0 +; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v27 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v28, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v27, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI3_1) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI3_1) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v28, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v27, v25 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI3_2) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) +; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v28 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v27, v26, 4, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v25, v26 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 +; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v27 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a1, a1, -241 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v26 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v27 ; LMULMAX1-RV32-NEXT: lui a1, 4112 ; LMULMAX1-RV32-NEXT: addi a1, a1, 257 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v26, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmul.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI3_3) -; LMULMAX1-RV32-NEXT: addi a1, a1, %lo(.LCPI3_3) +; LMULMAX1-RV32-NEXT: vmul.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: addi a1, zero, 56 ; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) +; LMULMAX1-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v26 ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) @@ -837,59 +831,56 @@ ; LMULMAX2-RV32-LABEL: ctpop_v4i64: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI7_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI7_0) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vle64.v v28, (a0) +; LMULMAX2-RV32-NEXT: addi a1, zero, 85 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v26, 0 +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v26, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: vsrl.vv v30, v28, v30 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v30 -; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v8 +; LMULMAX2-RV32-NEXT: vsub.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v26, 2, v0 +; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vv v30, v28, v30 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v30, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI7_1) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI7_1) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v8, (a1) +; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v8 -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v30, v26 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI7_2) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI7_2) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vand.vv v30, v30, v8 +; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v8 +; LMULMAX2-RV32-NEXT: vadd.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v26, 4, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v28, v26, v28 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsrl.vv v30, v28, v30 +; LMULMAX2-RV32-NEXT: vadd.vv v28, v28, v30 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vand.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v30 ; LMULMAX2-RV32-NEXT: lui a1, 4112 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmul.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI7_3) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI7_3) +; LMULMAX2-RV32-NEXT: vmul.vv v28, v28, v30 +; LMULMAX2-RV32-NEXT: addi a1, zero, 56 ; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsrl.vv v26, v28, v26 ; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -949,70 +940,67 @@ ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v26, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI7_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI7_0) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v27, (a2) +; LMULMAX1-RV32-NEXT: addi a2, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.i v27, 0 +; LMULMAX1-RV32-NEXT: vmerge.vim v28, v27, 1, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v28, v26, v27 +; LMULMAX1-RV32-NEXT: vsrl.vv v29, v26, v28 ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v29, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v30, a2 +; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v30 +; LMULMAX1-RV32-NEXT: vsub.vv v26, v26, v29 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v29, v27, 2, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v29 -; LMULMAX1-RV32-NEXT: vsub.vv v26, v26, v28 +; LMULMAX1-RV32-NEXT: vsrl.vv v31, v26, v29 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v28, a2 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v30, v26, v28 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI7_1) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI7_1) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v31, (a2) +; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v31 -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v28 -; LMULMAX1-RV32-NEXT: vadd.vv v26, v30, v26 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI7_2) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI7_2) -; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v30, (a2) +; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v8 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v8 +; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v31 +; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmerge.vim v31, v27, 4, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v26, v30 -; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v8 +; LMULMAX1-RV32-NEXT: vsrl.vv v9, v26, v31 +; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v9 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v9, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v8 +; LMULMAX1-RV32-NEXT: vand.vv v26, v26, v9 ; LMULMAX1-RV32-NEXT: lui a2, 4112 ; LMULMAX1-RV32-NEXT: addi a2, a2, 257 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmul.vv v26, v26, v9 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI7_3) +; LMULMAX1-RV32-NEXT: vmul.vv v26, v26, v10 +; LMULMAX1-RV32-NEXT: addi a2, zero, 56 ; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) +; LMULMAX1-RV32-NEXT: vmerge.vxm v27, v27, a2, v0 ; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v10 -; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v27 -; LMULMAX1-RV32-NEXT: vand.vv v27, v27, v29 -; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vand.vv v27, v25, v28 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v31 -; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v27, v25 -; LMULMAX1-RV32-NEXT: vsrl.vv v27, v25, v30 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vsrl.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vsrl.vv v28, v25, v28 +; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v30 +; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vsrl.vv v28, v25, v29 +; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v8 ; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v8 -; LMULMAX1-RV32-NEXT: vmul.vv v25, v25, v9 -; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v10 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vsrl.vv v28, v25, v31 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vand.vv v25, v25, v9 +; LMULMAX1-RV32-NEXT: vmul.vv v25, v25, v10 +; LMULMAX1-RV32-NEXT: vsrl.vv v25, v25, v27 ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v26, (a1) ; LMULMAX1-RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -78,3 +78,24 @@ store <4 x float> %v3, <4 x float>* %x ret void } + +define void @buildvec_merge0_v4f32(<4 x float>* %x, float %f) { +; CHECK-LABEL: buildvec_merge0_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 6 +; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; CHECK-NEXT: lui a2, %hi(.LCPI4_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI4_0)(a2) +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, fa0 +; CHECK-NEXT: vfmerge.vfm v25, v25, ft0, v0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %v0 = insertelement <4 x float> undef, float %f, i32 0 + %v1 = insertelement <4 x float> %v0, float 2.0, i32 1 + %v2 = insertelement <4 x float> %v1, float 2.0, i32 2 + %v3 = insertelement <4 x float> %v2, float %f, i32 3 + store <4 x float> %v3, <4 x float>* %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -48,44 +48,35 @@ define void @splat_v2i64(<2 x i64>* %x, i64 %y) { ; LMULMAX8-RV32-LABEL: splat_v2i64: ; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: addi sp, sp, -16 -; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8-RV32-NEXT: sw a2, 12(sp) -; LMULMAX8-RV32-NEXT: sw a1, 8(sp) -; LMULMAX8-RV32-NEXT: sw a2, 4(sp) -; LMULMAX8-RV32-NEXT: sw a1, 0(sp) -; LMULMAX8-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX8-RV32-NEXT: vle32.v v25, (sp) +; LMULMAX8-RV32-NEXT: addi a3, zero, 5 +; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu +; LMULMAX8-RV32-NEXT: vmv.s.x v0, a3 +; LMULMAX8-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX8-RV32-NEXT: vmv.v.x v25, a2 +; LMULMAX8-RV32-NEXT: vmerge.vxm v25, v25, a1, v0 ; LMULMAX8-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX8-RV32-NEXT: addi sp, sp, 16 ; LMULMAX8-RV32-NEXT: ret ; ; LMULMAX2-RV32-LABEL: splat_v2i64: ; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -16 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX2-RV32-NEXT: sw a2, 12(sp) -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32-NEXT: sw a2, 4(sp) -; LMULMAX2-RV32-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v25, (sp) +; LMULMAX2-RV32-NEXT: addi a3, zero, 5 +; LMULMAX2-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a3 +; LMULMAX2-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v25, a2 +; LMULMAX2-RV32-NEXT: vmerge.vxm v25, v25, a1, v0 ; LMULMAX2-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX2-RV32-NEXT: addi sp, sp, 16 ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX1-RV32-LABEL: splat_v2i64: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -16 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1-RV32-NEXT: sw a2, 12(sp) -; LMULMAX1-RV32-NEXT: sw a1, 8(sp) -; LMULMAX1-RV32-NEXT: sw a2, 4(sp) -; LMULMAX1-RV32-NEXT: sw a1, 0(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v25, (sp) +; LMULMAX1-RV32-NEXT: addi a3, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 +; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v25, a2 +; LMULMAX1-RV32-NEXT: vmerge.vxm v25, v25, a1, v0 ; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV32-NEXT: addi sp, sp, 16 ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX8-RV64-LABEL: splat_v2i64: @@ -206,74 +197,37 @@ define void @splat_v4i64(<4 x i64>* %x, i64 %y) { ; LMULMAX8-RV32-LABEL: splat_v4i64: ; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: addi sp, sp, -64 -; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 64 -; LMULMAX8-RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; LMULMAX8-RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; LMULMAX8-RV32-NEXT: .cfi_offset ra, -4 -; LMULMAX8-RV32-NEXT: .cfi_offset s0, -8 -; LMULMAX8-RV32-NEXT: addi s0, sp, 64 -; LMULMAX8-RV32-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX8-RV32-NEXT: andi sp, sp, -32 -; LMULMAX8-RV32-NEXT: sw a2, 28(sp) -; LMULMAX8-RV32-NEXT: sw a1, 24(sp) -; LMULMAX8-RV32-NEXT: sw a2, 20(sp) -; LMULMAX8-RV32-NEXT: sw a1, 16(sp) -; LMULMAX8-RV32-NEXT: sw a2, 12(sp) -; LMULMAX8-RV32-NEXT: sw a1, 8(sp) -; LMULMAX8-RV32-NEXT: sw a2, 4(sp) -; LMULMAX8-RV32-NEXT: sw a1, 0(sp) -; LMULMAX8-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX8-RV32-NEXT: vle32.v v26, (sp) +; LMULMAX8-RV32-NEXT: addi a3, zero, 85 +; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu +; LMULMAX8-RV32-NEXT: vmv.s.x v0, a3 +; LMULMAX8-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu +; LMULMAX8-RV32-NEXT: vmv.v.x v26, a2 +; LMULMAX8-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; LMULMAX8-RV32-NEXT: vse32.v v26, (a0) -; LMULMAX8-RV32-NEXT: addi sp, s0, -64 -; LMULMAX8-RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; LMULMAX8-RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; LMULMAX8-RV32-NEXT: addi sp, sp, 64 ; LMULMAX8-RV32-NEXT: ret ; ; LMULMAX2-RV32-LABEL: splat_v4i64: ; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -64 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 64 -; LMULMAX2-RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; LMULMAX2-RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; LMULMAX2-RV32-NEXT: .cfi_offset ra, -4 -; LMULMAX2-RV32-NEXT: .cfi_offset s0, -8 -; LMULMAX2-RV32-NEXT: addi s0, sp, 64 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX2-RV32-NEXT: andi sp, sp, -32 -; LMULMAX2-RV32-NEXT: sw a2, 28(sp) -; LMULMAX2-RV32-NEXT: sw a1, 24(sp) -; LMULMAX2-RV32-NEXT: sw a2, 20(sp) -; LMULMAX2-RV32-NEXT: sw a1, 16(sp) -; LMULMAX2-RV32-NEXT: sw a2, 12(sp) -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32-NEXT: sw a2, 4(sp) -; LMULMAX2-RV32-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v26, (sp) +; LMULMAX2-RV32-NEXT: addi a3, zero, 85 +; LMULMAX2-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a3 +; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v26, a2 +; LMULMAX2-RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; LMULMAX2-RV32-NEXT: vse32.v v26, (a0) -; LMULMAX2-RV32-NEXT: addi sp, s0, -64 -; LMULMAX2-RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; LMULMAX2-RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; LMULMAX2-RV32-NEXT: addi sp, sp, 64 ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX1-RV32-LABEL: splat_v4i64: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -16 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1-RV32-NEXT: sw a2, 12(sp) -; LMULMAX1-RV32-NEXT: sw a1, 8(sp) -; LMULMAX1-RV32-NEXT: sw a2, 4(sp) -; LMULMAX1-RV32-NEXT: sw a1, 0(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v25, (sp) +; LMULMAX1-RV32-NEXT: addi a3, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 +; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v25, a2 +; LMULMAX1-RV32-NEXT: vmerge.vxm v25, v25, a1, v0 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vse32.v v25, (a1) ; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) -; LMULMAX1-RV32-NEXT: addi sp, sp, 16 ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX8-RV64-LABEL: splat_v4i64: @@ -842,72 +796,23 @@ define void @vadd_vx_v16i64(<16 x i64>* %a, i64 %b, <16 x i64>* %c) { ; LMULMAX8-RV32-LABEL: vadd_vx_v16i64: ; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: addi sp, sp, -256 -; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX8-RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill -; LMULMAX8-RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill -; LMULMAX8-RV32-NEXT: .cfi_offset ra, -4 -; LMULMAX8-RV32-NEXT: .cfi_offset s0, -8 -; LMULMAX8-RV32-NEXT: addi s0, sp, 256 -; LMULMAX8-RV32-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX8-RV32-NEXT: andi sp, sp, -128 ; LMULMAX8-RV32-NEXT: vsetivli a4, 16, e64,m8,ta,mu ; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: sw a2, 124(sp) -; LMULMAX8-RV32-NEXT: sw a1, 120(sp) -; LMULMAX8-RV32-NEXT: sw a2, 116(sp) -; LMULMAX8-RV32-NEXT: sw a1, 112(sp) -; LMULMAX8-RV32-NEXT: sw a2, 108(sp) -; LMULMAX8-RV32-NEXT: sw a1, 104(sp) -; LMULMAX8-RV32-NEXT: sw a2, 100(sp) -; LMULMAX8-RV32-NEXT: sw a1, 96(sp) -; LMULMAX8-RV32-NEXT: sw a2, 92(sp) -; LMULMAX8-RV32-NEXT: sw a1, 88(sp) -; LMULMAX8-RV32-NEXT: sw a2, 84(sp) -; LMULMAX8-RV32-NEXT: sw a1, 80(sp) -; LMULMAX8-RV32-NEXT: sw a2, 76(sp) -; LMULMAX8-RV32-NEXT: sw a1, 72(sp) -; LMULMAX8-RV32-NEXT: sw a2, 68(sp) -; LMULMAX8-RV32-NEXT: sw a1, 64(sp) -; LMULMAX8-RV32-NEXT: sw a2, 60(sp) -; LMULMAX8-RV32-NEXT: sw a1, 56(sp) -; LMULMAX8-RV32-NEXT: sw a2, 52(sp) -; LMULMAX8-RV32-NEXT: sw a1, 48(sp) -; LMULMAX8-RV32-NEXT: sw a2, 44(sp) -; LMULMAX8-RV32-NEXT: sw a1, 40(sp) -; LMULMAX8-RV32-NEXT: sw a2, 36(sp) -; LMULMAX8-RV32-NEXT: sw a1, 32(sp) -; LMULMAX8-RV32-NEXT: sw a2, 28(sp) -; LMULMAX8-RV32-NEXT: sw a1, 24(sp) -; LMULMAX8-RV32-NEXT: sw a2, 20(sp) -; LMULMAX8-RV32-NEXT: sw a1, 16(sp) -; LMULMAX8-RV32-NEXT: sw a2, 12(sp) -; LMULMAX8-RV32-NEXT: sw a1, 8(sp) -; LMULMAX8-RV32-NEXT: sw a2, 4(sp) -; LMULMAX8-RV32-NEXT: sw a1, 0(sp) +; LMULMAX8-RV32-NEXT: lui a0, 349525 +; LMULMAX8-RV32-NEXT: addi a0, a0, 1365 +; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e32,m1,ta,mu +; LMULMAX8-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX8-RV32-NEXT: addi a0, zero, 32 ; LMULMAX8-RV32-NEXT: vsetvli a0, a0, e32,m8,ta,mu -; LMULMAX8-RV32-NEXT: vle32.v v16, (sp) +; LMULMAX8-RV32-NEXT: vmv.v.x v16, a2 +; LMULMAX8-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 ; LMULMAX8-RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v16 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a3) -; LMULMAX8-RV32-NEXT: addi sp, s0, -256 -; LMULMAX8-RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload -; LMULMAX8-RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload -; LMULMAX8-RV32-NEXT: addi sp, sp, 256 ; LMULMAX8-RV32-NEXT: ret ; ; LMULMAX2-RV32-LABEL: vadd_vx_v16i64: ; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -64 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 64 -; LMULMAX2-RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; LMULMAX2-RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; LMULMAX2-RV32-NEXT: .cfi_offset ra, -4 -; LMULMAX2-RV32-NEXT: .cfi_offset s0, -8 -; LMULMAX2-RV32-NEXT: addi s0, sp, 64 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX2-RV32-NEXT: andi sp, sp, -32 ; LMULMAX2-RV32-NEXT: addi a4, a0, 64 ; LMULMAX2-RV32-NEXT: vsetivli a5, 4, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vle64.v v26, (a4) @@ -916,16 +821,12 @@ ; LMULMAX2-RV32-NEXT: vle64.v v30, (a0) ; LMULMAX2-RV32-NEXT: addi a0, a0, 32 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: sw a2, 28(sp) -; LMULMAX2-RV32-NEXT: sw a1, 24(sp) -; LMULMAX2-RV32-NEXT: sw a2, 20(sp) -; LMULMAX2-RV32-NEXT: sw a1, 16(sp) -; LMULMAX2-RV32-NEXT: sw a2, 12(sp) -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32-NEXT: sw a2, 4(sp) -; LMULMAX2-RV32-NEXT: sw a1, 0(sp) +; LMULMAX2-RV32-NEXT: addi a0, zero, 85 +; LMULMAX2-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX2-RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v10, (sp) +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vadd.vv v30, v30, v10 @@ -938,16 +839,10 @@ ; LMULMAX2-RV32-NEXT: vse64.v v30, (a3) ; LMULMAX2-RV32-NEXT: addi a0, a3, 32 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi sp, s0, -64 -; LMULMAX2-RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; LMULMAX2-RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; LMULMAX2-RV32-NEXT: addi sp, sp, 64 ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX1-RV32-LABEL: vadd_vx_v16i64: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -16 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 ; LMULMAX1-RV32-NEXT: addi a4, a0, 96 ; LMULMAX1-RV32-NEXT: vsetivli a5, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vle64.v v25, (a4) @@ -964,12 +859,12 @@ ; LMULMAX1-RV32-NEXT: vle64.v v31, (a0) ; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: sw a2, 12(sp) -; LMULMAX1-RV32-NEXT: sw a1, 8(sp) -; LMULMAX1-RV32-NEXT: sw a2, 4(sp) -; LMULMAX1-RV32-NEXT: sw a1, 0(sp) +; LMULMAX1-RV32-NEXT: addi a0, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a4, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX1-RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle32.v v9, (sp) +; LMULMAX1-RV32-NEXT: vmv.v.x v9, a2 +; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a1, v0 ; LMULMAX1-RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: vadd.vv v31, v31, v9 @@ -994,7 +889,6 @@ ; LMULMAX1-RV32-NEXT: vse64.v v31, (a3) ; LMULMAX1-RV32-NEXT: addi a0, a3, 16 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi sp, sp, 16 ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX8-RV64-LABEL: vadd_vx_v16i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -837,30 +837,99 @@ } define void @mulhu_v16i8(<16 x i8>* %x) { -; CHECK-LABEL: mulhu_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu -; CHECK-NEXT: vle8.v v25, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI52_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI52_0) -; CHECK-NEXT: vle8.v v26, (a1) -; CHECK-NEXT: lui a1, %hi(.LCPI52_1) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI52_1) -; CHECK-NEXT: vle8.v v27, (a1) -; CHECK-NEXT: vsrl.vv v26, v25, v26 -; CHECK-NEXT: vmulhu.vv v26, v26, v27 -; CHECK-NEXT: lui a1, %hi(.LCPI52_2) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI52_2) -; CHECK-NEXT: vle8.v v27, (a1) -; CHECK-NEXT: lui a1, %hi(.LCPI52_3) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI52_3) -; CHECK-NEXT: vle8.v v28, (a1) -; CHECK-NEXT: vsub.vv v25, v25, v26 -; CHECK-NEXT: vmulhu.vv v25, v25, v27 -; CHECK-NEXT: vadd.vv v25, v25, v26 -; CHECK-NEXT: vsrl.vv v25, v25, v28 -; CHECK-NEXT: vse8.v v25, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: mulhu_v16i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV32-NEXT: vle8.v v25, (a0) +; RV32-NEXT: addi a1, zero, 513 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 4 +; RV32-NEXT: vmerge.vim v26, v26, 1, v0 +; RV32-NEXT: lui a1, 1 +; RV32-NEXT: addi a2, a1, 78 +; RV32-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; RV32-NEXT: vmerge.vim v26, v26, 3, v0 +; RV32-NEXT: lui a2, 8 +; RV32-NEXT: addi a2, a2, 304 +; RV32-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; RV32-NEXT: vmerge.vim v26, v26, 2, v0 +; RV32-NEXT: lui a2, 3 +; RV32-NEXT: addi a2, a2, -2044 +; RV32-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; RV32-NEXT: vmv.v.i v27, 0 +; RV32-NEXT: addi a2, zero, -128 +; RV32-NEXT: vmerge.vxm v28, v27, a2, v0 +; RV32-NEXT: addi a1, a1, 32 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV32-NEXT: lui a1, %hi(.LCPI52_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI52_0) +; RV32-NEXT: vle8.v v29, (a1) +; RV32-NEXT: vmerge.vim v27, v27, 1, v0 +; RV32-NEXT: vsrl.vv v27, v25, v27 +; RV32-NEXT: vmulhu.vv v27, v27, v29 +; RV32-NEXT: vsub.vv v25, v25, v27 +; RV32-NEXT: vmulhu.vv v25, v25, v28 +; RV32-NEXT: vadd.vv v25, v25, v27 +; RV32-NEXT: vsrl.vv v25, v25, v26 +; RV32-NEXT: vse8.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhu_v16i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV64-NEXT: vle8.v v25, (a0) +; RV64-NEXT: addi a1, zero, 513 +; RV64-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV64-NEXT: vmv.v.i v26, 4 +; RV64-NEXT: vmerge.vim v26, v26, 1, v0 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: addiw a2, a1, 78 +; RV64-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; RV64-NEXT: vmerge.vim v26, v26, 3, v0 +; RV64-NEXT: lui a2, 8 +; RV64-NEXT: addiw a2, a2, 304 +; RV64-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; RV64-NEXT: vmerge.vim v26, v26, 2, v0 +; RV64-NEXT: lui a2, 3 +; RV64-NEXT: addiw a2, a2, -2044 +; RV64-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; RV64-NEXT: vmv.v.i v27, 0 +; RV64-NEXT: addi a2, zero, -128 +; RV64-NEXT: vmerge.vxm v28, v27, a2, v0 +; RV64-NEXT: addiw a1, a1, 32 +; RV64-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV64-NEXT: lui a1, %hi(.LCPI52_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI52_0) +; RV64-NEXT: vle8.v v29, (a1) +; RV64-NEXT: vmerge.vim v27, v27, 1, v0 +; RV64-NEXT: vsrl.vv v27, v25, v27 +; RV64-NEXT: vmulhu.vv v27, v27, v29 +; RV64-NEXT: vsub.vv v25, v25, v27 +; RV64-NEXT: vmulhu.vv v25, v25, v28 +; RV64-NEXT: vadd.vv v25, v25, v27 +; RV64-NEXT: vsrl.vv v25, v25, v26 +; RV64-NEXT: vse8.v v25, (a0) +; RV64-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = udiv <16 x i8> %a, store <16 x i8> %b, <16 x i8>* %x @@ -872,27 +941,33 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v25, (a0) -; CHECK-NEXT: vmv.v.i v26, 0 -; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: vmv1r.v v27, v26 -; CHECK-NEXT: vmv.s.x v27, a1 ; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: vmv.s.x v28, a1 +; CHECK-NEXT: vmv.s.x v26, a1 +; CHECK-NEXT: addi a1, zero, 33 +; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v27, 3 +; CHECK-NEXT: vmerge.vim v27, v27, 2, v0 +; CHECK-NEXT: vsetivli a1, 7, e16,m1,tu,mu +; CHECK-NEXT: vslideup.vi v27, v26, 6 +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v28, 0 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vmv1r.v v29, v28 +; CHECK-NEXT: vmv.s.x v29, a1 ; CHECK-NEXT: vsetivli a1, 7, e16,m1,tu,mu -; CHECK-NEXT: vslideup.vi v26, v28, 6 +; CHECK-NEXT: vslideup.vi v28, v26, 6 ; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu ; CHECK-NEXT: lui a1, %hi(.LCPI53_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_0) -; CHECK-NEXT: vle16.v v28, (a1) -; CHECK-NEXT: vsrl.vv v26, v25, v26 -; CHECK-NEXT: vmulhu.vv v26, v26, v28 -; CHECK-NEXT: lui a1, %hi(.LCPI53_1) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_1) -; CHECK-NEXT: vle16.v v28, (a1) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vsrl.vv v28, v25, v28 +; CHECK-NEXT: vmulhu.vv v26, v28, v26 ; CHECK-NEXT: vsub.vv v25, v25, v26 -; CHECK-NEXT: vmulhu.vv v25, v25, v27 +; CHECK-NEXT: vmulhu.vv v25, v25, v29 ; CHECK-NEXT: vadd.vv v25, v25, v26 -; CHECK-NEXT: vsrl.vv v25, v25, v28 +; CHECK-NEXT: vsrl.vv v25, v25, v27 ; CHECK-NEXT: vse16.v v25, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x @@ -990,20 +1065,45 @@ } define void @mulhs_v16i8(<16 x i8>* %x) { -; CHECK-LABEL: mulhs_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu -; CHECK-NEXT: vle8.v v25, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI56_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI56_0) -; CHECK-NEXT: vle8.v v26, (a1) -; CHECK-NEXT: lui a1, %hi(.LCPI56_1) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI56_1) -; CHECK-NEXT: vle8.v v27, (a1) -; CHECK-NEXT: vmulhu.vv v25, v25, v26 -; CHECK-NEXT: vsrl.vv v25, v25, v27 -; CHECK-NEXT: vse8.v v25, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: mulhs_v16i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV32-NEXT: vle8.v v25, (a0) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, -1452 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 7 +; RV32-NEXT: vmerge.vim v26, v26, 1, v0 +; RV32-NEXT: addi a1, zero, -123 +; RV32-NEXT: vmv.v.x v27, a1 +; RV32-NEXT: addi a1, zero, 57 +; RV32-NEXT: vmerge.vxm v27, v27, a1, v0 +; RV32-NEXT: vmulhu.vv v25, v25, v27 +; RV32-NEXT: vsrl.vv v25, v25, v26 +; RV32-NEXT: vse8.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhs_v16i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV64-NEXT: vle8.v v25, (a0) +; RV64-NEXT: lui a1, 5 +; RV64-NEXT: addiw a1, a1, -1452 +; RV64-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; RV64-NEXT: vmv.v.i v26, 7 +; RV64-NEXT: vmerge.vim v26, v26, 1, v0 +; RV64-NEXT: addi a1, zero, -123 +; RV64-NEXT: vmv.v.x v27, a1 +; RV64-NEXT: addi a1, zero, 57 +; RV64-NEXT: vmerge.vxm v27, v27, a1, v0 +; RV64-NEXT: vmulhu.vv v25, v25, v27 +; RV64-NEXT: vsrl.vv v25, v25, v26 +; RV64-NEXT: vse8.v v25, (a0) +; RV64-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = udiv <16 x i8> %a, store <16 x i8> %b, <16 x i8>* %x @@ -1011,19 +1111,47 @@ } define void @mulhs_v8i16(<8 x i16>* %x) { -; CHECK-LABEL: mulhs_v8i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu -; CHECK-NEXT: vle16.v v25, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI57_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI57_0) -; CHECK-NEXT: vle16.v v26, (a1) -; CHECK-NEXT: vmulh.vv v25, v25, v26 -; CHECK-NEXT: vsra.vi v25, v25, 1 -; CHECK-NEXT: vsrl.vi v26, v25, 15 -; CHECK-NEXT: vadd.vv v25, v25, v26 -; CHECK-NEXT: vse16.v v25, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: mulhs_v8i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: addi a1, zero, 105 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, -1755 +; RV32-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a1 +; RV32-NEXT: lui a1, 1048571 +; RV32-NEXT: addi a1, a1, 1755 +; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 +; RV32-NEXT: vmulh.vv v25, v25, v26 +; RV32-NEXT: vsra.vi v25, v25, 1 +; RV32-NEXT: vsrl.vi v26, v25, 15 +; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vse16.v v25, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhs_v8i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; RV64-NEXT: vle16.v v25, (a0) +; RV64-NEXT: addi a1, zero, 105 +; RV64-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: lui a1, 5 +; RV64-NEXT: addiw a1, a1, -1755 +; RV64-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a1 +; RV64-NEXT: lui a1, 1048571 +; RV64-NEXT: addiw a1, a1, 1755 +; RV64-NEXT: vmerge.vxm v26, v26, a1, v0 +; RV64-NEXT: vmulh.vv v25, v25, v26 +; RV64-NEXT: vsra.vi v25, v25, 1 +; RV64-NEXT: vsrl.vi v26, v25, 15 +; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vse16.v v25, (a0) +; RV64-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = sdiv <8 x i16> %a, store <8 x i16> %b, <8 x i16>* %x @@ -1035,9 +1163,16 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; RV32-NEXT: vle32.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI58_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI58_0) -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: lui a1, 419430 +; RV32-NEXT: addi a1, a1, 1639 +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a1 +; RV32-NEXT: lui a1, 629146 +; RV32-NEXT: addi a1, a1, -1639 +; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vmulh.vv v25, v25, v26 ; RV32-NEXT: vsrl.vi v26, v25, 31 ; RV32-NEXT: vsra.vi v25, v25, 1 @@ -1049,9 +1184,16 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; RV64-NEXT: vle32.v v25, (a0) -; RV64-NEXT: lui a1, %hi(.LCPI58_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI58_0) -; RV64-NEXT: vle32.v v26, (a1) +; RV64-NEXT: addi a1, zero, 5 +; RV64-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: lui a1, 419430 +; RV64-NEXT: addiw a1, a1, 1639 +; RV64-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v26, a1 +; RV64-NEXT: lui a1, 629146 +; RV64-NEXT: addiw a1, a1, -1639 +; RV64-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV64-NEXT: vmulh.vv v25, v25, v26 ; RV64-NEXT: vsra.vi v25, v25, 1 ; RV64-NEXT: vsrl.vi v26, v25, 31 @@ -1069,36 +1211,40 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI59_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI59_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vmul.vv v26, v25, v26 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a2, a1, 1365 ; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.v.x v27, a2 +; RV32-NEXT: vmv.v.x v26, a2 ; RV32-NEXT: addi a1, a1, 1366 -; RV32-NEXT: vmv.s.x v27, a1 +; RV32-NEXT: vmv.s.x v26, a1 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vmulh.vv v25, v25, v27 -; RV32-NEXT: vadd.vv v25, v25, v26 -; RV32-NEXT: lui a1, %hi(.LCPI59_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI59_1) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) -; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsrl.vv v26, v25, v26 +; RV32-NEXT: vmulh.vv v26, v25, v26 ; RV32-NEXT: addi a1, zero, 1 +; RV32-NEXT: addi a2, zero, 3 +; RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v27, -1 +; RV32-NEXT: vmerge.vim v27, v27, 0, v0 +; RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; RV32-NEXT: vmul.vv v25, v25, v27 +; RV32-NEXT: vadd.vv v25, v26, v25 +; RV32-NEXT: addi a2, zero, 5 +; RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a2 ; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vmv.s.x v27, a1 -; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: addi a2, zero, 63 +; RV32-NEXT: vmerge.vxm v27, v26, a2, v0 +; RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; RV32-NEXT: vsrl.vv v27, v25, v27 +; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.s.x v28, a1 ; RV32-NEXT: vsetivli a1, 3, e32,m1,tu,mu -; RV32-NEXT: vslideup.vi v28, v27, 2 +; RV32-NEXT: vslideup.vi v26, v28, 2 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; RV32-NEXT: vsra.vv v25, v25, v28 -; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vsra.vv v25, v25, v26 +; RV32-NEXT: vadd.vv v25, v25, v27 ; RV32-NEXT: vse64.v v25, (a0) ; RV32-NEXT: ret ; @@ -3848,31 +3994,105 @@ } define void @mulhu_v32i8(<32 x i8>* %x) { -; LMULMAX2-LABEL: mulhu_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a1, zero, 32 -; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu -; LMULMAX2-NEXT: vle8.v v26, (a0) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI129_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI129_0) -; LMULMAX2-NEXT: vle8.v v28, (a1) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI129_1) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI129_1) -; LMULMAX2-NEXT: vle8.v v30, (a1) -; LMULMAX2-NEXT: vsrl.vv v28, v26, v28 -; LMULMAX2-NEXT: vmulhu.vv v28, v28, v30 -; LMULMAX2-NEXT: lui a1, %hi(.LCPI129_2) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI129_2) -; LMULMAX2-NEXT: vle8.v v30, (a1) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI129_3) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI129_3) -; LMULMAX2-NEXT: vle8.v v8, (a1) -; LMULMAX2-NEXT: vsub.vv v26, v26, v28 -; LMULMAX2-NEXT: vmulhu.vv v26, v26, v30 -; LMULMAX2-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-NEXT: vsrl.vv v26, v26, v8 -; LMULMAX2-NEXT: vse8.v v26, (a0) -; LMULMAX2-NEXT: ret +; LMULMAX2-RV32-LABEL: mulhu_v32i8: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: addi a1, zero, 32 +; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle8.v v26, (a0) +; LMULMAX2-RV32-NEXT: lui a2, 8208 +; LMULMAX2-RV32-NEXT: addi a2, a2, 513 +; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v28, 4 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0 +; LMULMAX2-RV32-NEXT: lui a2, 66785 +; LMULMAX2-RV32-NEXT: addi a2, a2, 78 +; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 3, v0 +; LMULMAX2-RV32-NEXT: lui a2, 529160 +; LMULMAX2-RV32-NEXT: addi a2, a2, 304 +; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 2, v0 +; LMULMAX2-RV32-NEXT: lui a2, 163907 +; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 +; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 +; LMULMAX2-RV32-NEXT: addi a2, zero, -128 +; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v30, a2, v0 +; LMULMAX2-RV32-NEXT: lui a2, 66049 +; LMULMAX2-RV32-NEXT: addi a2, a2, 32 +; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI129_0) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI129_0) +; LMULMAX2-RV32-NEXT: vle8.v v10, (a1) +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v30, v26, v30 +; LMULMAX2-RV32-NEXT: vmulhu.vv v30, v30, v10 +; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v8 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vse8.v v26, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX2-RV64-LABEL: mulhu_v32i8: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: addi a1, zero, 32 +; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vle8.v v26, (a0) +; LMULMAX2-RV64-NEXT: lui a2, 8208 +; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 +; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.i v28, 4 +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 1, v0 +; LMULMAX2-RV64-NEXT: lui a2, 66785 +; LMULMAX2-RV64-NEXT: addiw a2, a2, 78 +; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 3, v0 +; LMULMAX2-RV64-NEXT: lui a2, 529160 +; LMULMAX2-RV64-NEXT: addiw a2, a2, 304 +; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 2, v0 +; LMULMAX2-RV64-NEXT: lui a2, 163907 +; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 +; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.i v30, 0 +; LMULMAX2-RV64-NEXT: addi a2, zero, -128 +; LMULMAX2-RV64-NEXT: vmerge.vxm v8, v30, a2, v0 +; LMULMAX2-RV64-NEXT: lui a2, 66049 +; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 +; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI129_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI129_0) +; LMULMAX2-RV64-NEXT: vle8.v v10, (a1) +; LMULMAX2-RV64-NEXT: vmerge.vim v30, v30, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v30, v26, v30 +; LMULMAX2-RV64-NEXT: vmulhu.vv v30, v30, v10 +; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v8 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vse8.v v26, (a0) +; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-LABEL: mulhu_v32i8: ; LMULMAX1: # %bb.0: @@ -3895,30 +4115,83 @@ } define void @mulhu_v16i16(<16 x i16>* %x) { -; LMULMAX2-LABEL: mulhu_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli a1, 16, e16,m2,ta,mu -; LMULMAX2-NEXT: vle16.v v26, (a0) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI130_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI130_0) -; LMULMAX2-NEXT: vle16.v v28, (a1) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI130_1) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI130_1) -; LMULMAX2-NEXT: vle16.v v30, (a1) -; LMULMAX2-NEXT: vsrl.vv v28, v26, v28 -; LMULMAX2-NEXT: vmulhu.vv v28, v28, v30 -; LMULMAX2-NEXT: lui a1, %hi(.LCPI130_2) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI130_2) -; LMULMAX2-NEXT: vle16.v v30, (a1) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI130_3) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI130_3) -; LMULMAX2-NEXT: vle16.v v8, (a1) -; LMULMAX2-NEXT: vsub.vv v26, v26, v28 -; LMULMAX2-NEXT: vmulhu.vv v26, v26, v30 -; LMULMAX2-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-NEXT: vsrl.vv v26, v26, v8 -; LMULMAX2-NEXT: vse16.v v26, (a0) -; LMULMAX2-NEXT: ret +; LMULMAX2-RV32-LABEL: mulhu_v16i16: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle16.v v26, (a0) +; LMULMAX2-RV32-NEXT: lui a1, 2 +; LMULMAX2-RV32-NEXT: addi a1, a1, 289 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v28, 3 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 2, v0 +; LMULMAX2-RV32-NEXT: lui a1, 4 +; LMULMAX2-RV32-NEXT: addi a1, a1, 64 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0 +; LMULMAX2-RV32-NEXT: vmv1r.v v12, v0 +; LMULMAX2-RV32-NEXT: addi a1, zero, 257 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 +; LMULMAX2-RV32-NEXT: lui a1, 1048568 +; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI130_0) +; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI130_0) +; LMULMAX2-RV32-NEXT: vle16.v v8, (a2) +; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v30, a1, v0 +; LMULMAX2-RV32-NEXT: vmv1r.v v0, v12 +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v30, v26, v30 +; LMULMAX2-RV32-NEXT: vmulhu.vv v30, v30, v8 +; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v10 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vse16.v v26, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX2-RV64-LABEL: mulhu_v16i16: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV64-NEXT: vle16.v v26, (a0) +; LMULMAX2-RV64-NEXT: lui a1, 2 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 +; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.i v28, 3 +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 2, v0 +; LMULMAX2-RV64-NEXT: lui a1, 4 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 +; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 1, v0 +; LMULMAX2-RV64-NEXT: vmv1r.v v12, v0 +; LMULMAX2-RV64-NEXT: addi a1, zero, 257 +; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.i v30, 0 +; LMULMAX2-RV64-NEXT: lui a1, 1048568 +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI130_0) +; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI130_0) +; LMULMAX2-RV64-NEXT: vle16.v v8, (a2) +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v30, a1, v0 +; LMULMAX2-RV64-NEXT: vmv1r.v v0, v12 +; LMULMAX2-RV64-NEXT: vmerge.vim v30, v30, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v30, v26, v30 +; LMULMAX2-RV64-NEXT: vmulhu.vv v30, v30, v8 +; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v10 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vse16.v v26, (a0) +; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-LABEL: mulhu_v16i16: ; LMULMAX1: # %bb.0: @@ -3945,20 +4218,27 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: addi a1, zero, 68 +; LMULMAX2-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-NEXT: lui a1, %hi(.LCPI131_0) ; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI131_0) ; LMULMAX2-NEXT: vle32.v v28, (a1) +; LMULMAX2-NEXT: vmv.v.i v30, 0 +; LMULMAX2-NEXT: lui a1, 524288 +; LMULMAX2-NEXT: vmerge.vxm v30, v30, a1, v0 ; LMULMAX2-NEXT: vmulhu.vv v28, v26, v28 -; LMULMAX2-NEXT: lui a1, %hi(.LCPI131_1) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI131_1) -; LMULMAX2-NEXT: vle32.v v30, (a1) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI131_2) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI131_2) -; LMULMAX2-NEXT: vle32.v v8, (a1) ; LMULMAX2-NEXT: vsub.vv v26, v26, v28 ; LMULMAX2-NEXT: vmulhu.vv v26, v26, v30 ; LMULMAX2-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-NEXT: vsrl.vv v26, v26, v8 +; LMULMAX2-NEXT: addi a1, zero, 136 +; LMULMAX2-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.i v28, 2 +; LMULMAX2-NEXT: vmerge.vim v28, v28, 1, v0 +; LMULMAX2-NEXT: vsrl.vv v26, v26, v28 ; LMULMAX2-NEXT: vse32.v v26, (a0) ; LMULMAX2-NEXT: ret ; @@ -4163,36 +4443,85 @@ } define void @mulhs_v32i8(<32 x i8>* %x) { -; LMULMAX2-LABEL: mulhs_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a1, zero, 32 -; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu -; LMULMAX2-NEXT: vle8.v v26, (a0) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI133_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI133_0) -; LMULMAX2-NEXT: vle8.v v28, (a1) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI133_1) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI133_1) -; LMULMAX2-NEXT: vle8.v v30, (a1) -; LMULMAX2-NEXT: vmulhu.vv v26, v26, v28 -; LMULMAX2-NEXT: vsrl.vv v26, v26, v30 -; LMULMAX2-NEXT: vse8.v v26, (a0) -; LMULMAX2-NEXT: ret +; LMULMAX2-RV32-LABEL: mulhs_v32i8: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: addi a1, zero, 32 +; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle8.v v26, (a0) +; LMULMAX2-RV32-NEXT: lui a2, 304453 +; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 +; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v28, 7 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0 +; LMULMAX2-RV32-NEXT: addi a1, zero, -123 +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1 +; LMULMAX2-RV32-NEXT: addi a1, zero, 57 +; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a1, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v30 +; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vse8.v v26, (a0) +; LMULMAX2-RV32-NEXT: ret ; -; LMULMAX1-LABEL: mulhs_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle8.v v25, (a1) -; LMULMAX1-NEXT: lui a2, %hi(.LCPI133_0) -; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI133_0) -; LMULMAX1-NEXT: vle8.v v26, (a2) -; LMULMAX1-NEXT: vle8.v v27, (a0) -; LMULMAX1-NEXT: vdivu.vv v25, v25, v26 -; LMULMAX1-NEXT: vdivu.vv v26, v27, v26 -; LMULMAX1-NEXT: vse8.v v26, (a0) -; LMULMAX1-NEXT: vse8.v v25, (a1) -; LMULMAX1-NEXT: ret +; LMULMAX2-RV64-LABEL: mulhs_v32i8: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: addi a1, zero, 32 +; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vle8.v v26, (a0) +; LMULMAX2-RV64-NEXT: lui a2, 304453 +; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 +; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.i v28, 7 +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 1, v0 +; LMULMAX2-RV64-NEXT: addi a1, zero, -123 +; LMULMAX2-RV64-NEXT: vmv.v.x v30, a1 +; LMULMAX2-RV64-NEXT: addi a1, zero, 57 +; LMULMAX2-RV64-NEXT: vmerge.vxm v30, v30, a1, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v30 +; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vse8.v v26, (a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV32-LABEL: mulhs_v32i8: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a1, a0, 16 +; LMULMAX1-RV32-NEXT: vle8.v v26, (a1) +; LMULMAX1-RV32-NEXT: lui a2, 5 +; LMULMAX1-RV32-NEXT: addi a2, a2, -1452 +; LMULMAX1-RV32-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.i v27, -9 +; LMULMAX1-RV32-NEXT: vmerge.vim v27, v27, 9, v0 +; LMULMAX1-RV32-NEXT: vdivu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vdivu.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse8.v v26, (a1) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: mulhs_v32i8: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle8.v v26, (a1) +; LMULMAX1-RV64-NEXT: lui a2, 5 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -1452 +; LMULMAX1-RV64-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.i v27, -9 +; LMULMAX1-RV64-NEXT: vmerge.vim v27, v27, 9, v0 +; LMULMAX1-RV64-NEXT: vdivu.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vdivu.vv v25, v25, v27 +; LMULMAX1-RV64-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse8.v v26, (a1) +; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = udiv <32 x i8> %a, store <32 x i8> %b, <32 x i8>* %x @@ -4200,33 +4529,66 @@ } define void @mulhs_v16i16(<16 x i16>* %x) { -; LMULMAX2-LABEL: mulhs_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli a1, 16, e16,m2,ta,mu -; LMULMAX2-NEXT: vle16.v v26, (a0) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI134_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI134_0) -; LMULMAX2-NEXT: vle16.v v28, (a1) -; LMULMAX2-NEXT: vmulh.vv v26, v26, v28 -; LMULMAX2-NEXT: vsra.vi v26, v26, 1 -; LMULMAX2-NEXT: vsrl.vi v28, v26, 15 -; LMULMAX2-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-NEXT: vse16.v v26, (a0) -; LMULMAX2-NEXT: ret +; LMULMAX2-RV32-LABEL: mulhs_v16i16: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV32-NEXT: vle16.v v26, (a0) +; LMULMAX2-RV32-NEXT: lui a1, 7 +; LMULMAX2-RV32-NEXT: addi a1, a1, -1687 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: lui a1, 5 +; LMULMAX2-RV32-NEXT: addi a1, a1, -1755 +; LMULMAX2-RV32-NEXT: vsetivli a2, 16, e16,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: lui a1, 1048571 +; LMULMAX2-RV32-NEXT: addi a1, a1, 1755 +; LMULMAX2-RV32-NEXT: vmerge.vxm v28, v28, a1, v0 +; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsra.vi v26, v26, 1 +; LMULMAX2-RV32-NEXT: vsrl.vi v28, v26, 15 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vse16.v v26, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX2-RV64-LABEL: mulhs_v16i16: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-RV64-NEXT: vle16.v v26, (a0) +; LMULMAX2-RV64-NEXT: lui a1, 7 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687 +; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: lui a1, 5 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -1755 +; LMULMAX2-RV64-NEXT: vsetivli a2, 16, e16,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV64-NEXT: lui a1, 1048571 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1755 +; LMULMAX2-RV64-NEXT: vmerge.vxm v28, v28, a1, v0 +; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vsra.vi v26, v26, 1 +; LMULMAX2-RV64-NEXT: vsrl.vi v28, v26, 15 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vse16.v v26, (a0) +; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-LABEL: mulhs_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v25, (a1) -; LMULMAX1-NEXT: lui a2, %hi(.LCPI134_0) -; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI134_0) -; LMULMAX1-NEXT: vle16.v v26, (a2) -; LMULMAX1-NEXT: vle16.v v27, (a0) -; LMULMAX1-NEXT: vdiv.vv v25, v25, v26 -; LMULMAX1-NEXT: vdiv.vv v26, v27, v26 -; LMULMAX1-NEXT: vse16.v v26, (a0) -; LMULMAX1-NEXT: vse16.v v25, (a1) +; LMULMAX1-NEXT: vle16.v v26, (a1) +; LMULMAX1-NEXT: addi a2, zero, 105 +; LMULMAX1-NEXT: vsetivli a3, 1, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.s.x v0, a2 +; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v27, 7 +; LMULMAX1-NEXT: vmerge.vim v27, v27, -7, v0 +; LMULMAX1-NEXT: vdiv.vv v26, v26, v27 +; LMULMAX1-NEXT: vdiv.vv v25, v25, v27 +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: vse16.v v26, (a1) ; LMULMAX1-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = sdiv <16 x i16> %a, @@ -4239,9 +4601,16 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-RV32-NEXT: vle32.v v26, (a0) -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI135_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI135_0) -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: addi a1, zero, 85 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: lui a1, 419430 +; LMULMAX2-RV32-NEXT: addi a1, a1, 1639 +; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV32-NEXT: lui a1, 629146 +; LMULMAX2-RV32-NEXT: addi a1, a1, -1639 +; LMULMAX2-RV32-NEXT: vmerge.vxm v28, v28, a1, v0 ; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v28 ; LMULMAX2-RV32-NEXT: vsrl.vi v28, v26, 31 ; LMULMAX2-RV32-NEXT: vsra.vi v26, v26, 1 @@ -4253,9 +4622,16 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-RV64-NEXT: vle32.v v26, (a0) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI135_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI135_0) -; LMULMAX2-RV64-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV64-NEXT: addi a1, zero, 85 +; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: lui a1, 419430 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1639 +; LMULMAX2-RV64-NEXT: vsetivli a2, 8, e32,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.x v28, a1 +; LMULMAX2-RV64-NEXT: lui a1, 629146 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -1639 +; LMULMAX2-RV64-NEXT: vmerge.vxm v28, v28, a1, v0 ; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: vsra.vi v26, v26, 1 ; LMULMAX2-RV64-NEXT: vsrl.vi v28, v26, 31 @@ -4266,37 +4642,47 @@ ; LMULMAX1-RV32-LABEL: mulhs_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v25, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI135_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI135_0) -; LMULMAX1-RV32-NEXT: vle32.v v26, (a2) -; LMULMAX1-RV32-NEXT: vle32.v v27, (a0) -; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vsrl.vi v28, v25, 31 -; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v28 -; LMULMAX1-RV32-NEXT: vmulh.vv v26, v27, v26 -; LMULMAX1-RV32-NEXT: vsrl.vi v27, v26, 31 +; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) +; LMULMAX1-RV32-NEXT: addi a2, zero, 5 +; LMULMAX1-RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV32-NEXT: lui a2, 419430 +; LMULMAX1-RV32-NEXT: addi a2, a2, 1639 +; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v27, a2 +; LMULMAX1-RV32-NEXT: lui a2, 629146 +; LMULMAX1-RV32-NEXT: addi a2, a2, -1639 +; LMULMAX1-RV32-NEXT: vmerge.vxm v27, v27, a2, v0 +; LMULMAX1-RV32-NEXT: vmulh.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v28, v26, 31 ; LMULMAX1-RV32-NEXT: vsra.vi v26, v26, 1 -; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v27 -; LMULMAX1-RV32-NEXT: vse32.v v26, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v25, (a1) +; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX1-RV32-NEXT: vmulh.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vsrl.vi v27, v25, 31 +; LMULMAX1-RV32-NEXT: vsra.vi v25, v25, 1 +; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v26, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhs_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v25, (a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI135_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI135_0) -; LMULMAX1-RV64-NEXT: vle32.v v26, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v27, (a0) -; LMULMAX1-RV64-NEXT: vdiv.vv v25, v25, v26 -; LMULMAX1-RV64-NEXT: vdiv.vv v26, v27, v26 -; LMULMAX1-RV64-NEXT: vse32.v v26, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v25, (a1) +; LMULMAX1-RV64-NEXT: vle32.v v26, (a1) +; LMULMAX1-RV64-NEXT: addi a2, zero, 5 +; LMULMAX1-RV64-NEXT: vsetivli a3, 1, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV64-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.i v27, 5 +; LMULMAX1-RV64-NEXT: vmerge.vim v27, v27, -5, v0 +; LMULMAX1-RV64-NEXT: vdiv.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vdiv.vv v25, v25, v27 +; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse32.v v26, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = sdiv <8 x i32> %a, @@ -4309,32 +4695,43 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_0) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: addi a1, zero, 51 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v28, -1 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmul.vv v28, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_1) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_1) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) +; LMULMAX2-RV32-NEXT: addi a1, zero, 17 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: lui a1, 349525 +; LMULMAX2-RV32-NEXT: addi a2, a1, 1365 +; LMULMAX2-RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.x v30, a2 +; LMULMAX2-RV32-NEXT: addi a1, a1, 1366 +; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v30 ; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_2) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_2) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX2-RV32-NEXT: addi a1, zero, 85 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.v.i v28, 0 +; LMULMAX2-RV32-NEXT: addi a1, zero, 63 +; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v28, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vv v28, v26, v28 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI136_3) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI136_3) -; LMULMAX2-RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle32.v v30, (a1) +; LMULMAX2-RV32-NEXT: vsrl.vv v30, v26, v30 +; LMULMAX2-RV32-NEXT: addi a1, zero, 68 +; LMULMAX2-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsra.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsra.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v30 ; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -4342,20 +4739,30 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_0) -; LMULMAX2-RV64-NEXT: vle64.v v28, (a1) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_1) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_1) -; LMULMAX2-RV64-NEXT: vle64.v v30, (a1) +; LMULMAX2-RV64-NEXT: addi a1, zero, 5 +; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-RV64-NEXT: vmv.v.i v28, -1 +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 0, v0 ; LMULMAX2-RV64-NEXT: vmul.vv v28, v26, v28 +; LMULMAX2-RV64-NEXT: lui a1, 21845 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a1, a1, 12 +; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a1, a1, 12 +; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a1, a1, 12 +; LMULMAX2-RV64-NEXT: addi a2, a1, 1365 +; LMULMAX2-RV64-NEXT: vmv.v.x v30, a2 +; LMULMAX2-RV64-NEXT: addi a1, a1, 1366 +; LMULMAX2-RV64-NEXT: vmerge.vxm v30, v30, a1, v0 ; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v30 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_2) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_2) -; LMULMAX2-RV64-NEXT: vle64.v v30, (a1) ; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: addi a1, zero, 63 ; LMULMAX2-RV64-NEXT: vsrl.vx v28, v26, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v30, 1 +; LMULMAX2-RV64-NEXT: vmerge.vim v30, v30, 0, v0 ; LMULMAX2-RV64-NEXT: vsra.vv v26, v26, v30 ; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 ; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) @@ -5304,10 +5711,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI160_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI160_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vmerge.vim v26, v26, 1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vadd.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -5556,10 +5965,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI174_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI174_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vmerge.vim v26, v26, 1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vsub.vv v25, v26, v25 ; RV32-NEXT: vse64.v v25, (a0) @@ -5825,10 +6236,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI190_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI190_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, -1 +; RV32-NEXT: vmerge.vim v26, v26, -2, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vand.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -5902,10 +6315,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI194_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI194_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vmerge.vim v26, v26, 1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vand.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -6075,10 +6490,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI204_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI204_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, -1 +; RV32-NEXT: vmerge.vim v26, v26, -2, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vor.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -6152,10 +6569,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI208_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI208_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vmerge.vim v26, v26, 1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vor.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -6400,10 +6819,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI222_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI222_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vmerge.vim v26, v26, 1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vxor.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -6573,10 +6994,13 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI232_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI232_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: addi a1, zero, 31 +; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vsrl.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -6698,10 +7122,13 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI239_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI239_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: addi a1, zero, 31 +; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vsra.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -6823,10 +7250,13 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI246_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI246_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: addi a1, zero, 31 +; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vsll.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -7170,16 +7600,20 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI265_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI265_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: lui a1, 699051 +; RV32-NEXT: addi a2, a1, -1366 +; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a2 +; RV32-NEXT: addi a1, a1, -1365 +; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vmulhu.vv v25, v25, v26 -; RV32-NEXT: lui a1, %hi(.LCPI265_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI265_1) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vmerge.vim v26, v26, 1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vsrl.vv v25, v25, v26 ; RV32-NEXT: vse64.v v25, (a0) @@ -7292,16 +7726,21 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v25, (a0) -; RV32-NEXT: lui a1, %hi(.LCPI269_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI269_0) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a2, a1, 1365 +; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v26, a2 +; RV32-NEXT: addi a1, a1, 1366 +; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vmulh.vv v25, v25, v26 -; RV32-NEXT: lui a1, %hi(.LCPI269_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI269_1) -; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: addi a1, zero, 63 +; RV32-NEXT: vmerge.vxm v26, v26, a1, v0 ; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; RV32-NEXT: vsrl.vv v26, v25, v26 ; RV32-NEXT: vadd.vv v25, v25, v26 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -1035,17 +1035,22 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI49_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI49_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_sext_v8i8_v8i64: @@ -1066,17 +1071,22 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI50_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI50_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_zext_v8i8_v8i64: @@ -1122,17 +1132,22 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI52_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI52_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_sext_v8i16_v8i64: @@ -1153,17 +1168,22 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(i64* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI53_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI53_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_zext_v8i16_v8i64: @@ -1208,17 +1228,22 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i32_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI55_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI55_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_sext_v8i32_v8i64: @@ -1239,17 +1264,22 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(i64* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i32_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI56_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI56_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_zext_v8i32_v8i64: @@ -1270,13 +1300,18 @@ define <8 x i64> @mgather_baseidx_v8i64(i64* %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) { ; RV32-LABEL: mgather_baseidx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI57_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI57_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v28, (a1) +; RV32-NEXT: vmv1r.v v25, v0 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vmerge.vim v28, v28, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v8, v28 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret @@ -1938,17 +1973,22 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI88_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI88_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_sext_v8i8_v8f64: @@ -1969,17 +2009,22 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(double* %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI89_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI89_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_zext_v8i8_v8f64: @@ -2025,17 +2070,22 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI91_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI91_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_sext_v8i16_v8f64: @@ -2056,17 +2106,22 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(double* %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI92_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI92_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_zext_v8i16_v8f64: @@ -2111,17 +2166,22 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_sext_v8i32_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI94_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI94_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_sext_v8i32_v8f64: @@ -2142,17 +2202,22 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(double* %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_zext_v8i32_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v8 -; RV32-NEXT: lui a1, %hi(.LCPI95_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI95_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu -; RV32-NEXT: vsll.vv v28, v28, v16 +; RV32-NEXT: vsll.vv v28, v28, v8 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu -; RV32-NEXT: vloxei64.v v8, (a0), v28, v0.t +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t +; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_zext_v8i32_v8f64: @@ -2173,13 +2238,18 @@ define <8 x double> @mgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) { ; RV32-LABEL: mgather_baseidx_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI96_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI96_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v28, (a1) +; RV32-NEXT: vmv1r.v v25, v0 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vmerge.vim v28, v28, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v8, v28 ; RV32-NEXT: vsetivli a1, 8, e64,m4,tu,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vloxei64.v v12, (a0), v28, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -857,15 +857,20 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI43_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI43_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -886,15 +891,20 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI44_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -938,15 +948,20 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI46_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI46_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -967,15 +982,20 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI47_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI47_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1018,15 +1038,20 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i32_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI49_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI49_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1047,15 +1072,20 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i32_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI50_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI50_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1076,13 +1106,18 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI51_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI51_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v28, (a1) +; RV32-NEXT: vmv1r.v v25, v0 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vmerge.vim v28, v28, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v12, v28 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1688,15 +1723,20 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf8 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI82_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI82_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1717,15 +1757,20 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf8 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI83_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI83_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1769,15 +1814,20 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf4 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI85_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI85_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1798,15 +1848,20 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf4 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI86_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI86_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1849,15 +1904,20 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_sext_v8i32_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsext.vf2 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI88_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI88_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1878,15 +1938,20 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_zext_v8i32_v8f64: ; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v25, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vzext.vf2 v28, v12 -; RV32-NEXT: lui a1, %hi(.LCPI89_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI89_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v12, (a1) +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v28, v12 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1907,13 +1972,18 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64> %idxs, <8 x i1> %m) { ; RV32-LABEL: mscatter_baseidx_v8f64: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI90_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI90_0) -; RV32-NEXT: vsetivli a2, 16, e32,m4,ta,mu -; RV32-NEXT: vle32.v v28, (a1) +; RV32-NEXT: vmv1r.v v25, v0 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli a2, 1, e16,m1,ta,mu +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v28, 0 +; RV32-NEXT: vmerge.vim v28, v28, 3, v0 ; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu ; RV32-NEXT: vsll.vv v28, v12, v28 ; RV32-NEXT: vsetivli a1, 4, e64,m4,ta,mu +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: vsoxei64.v v8, (a0), v28, v0.t ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -699,14 +699,16 @@ ; RV32MV-NEXT: call __moddi3@plt ; RV32MV-NEXT: sw a1, 12(sp) ; RV32MV-NEXT: sw a0, 8(sp) +; RV32MV-NEXT: addi a0, zero, 85 +; RV32MV-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; RV32MV-NEXT: vmv.s.x v0, a0 +; RV32MV-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV32MV-NEXT: vmv.v.i v26, 1 +; RV32MV-NEXT: vle32.v v28, (sp) ; RV32MV-NEXT: lui a0, %hi(.LCPI3_0) ; RV32MV-NEXT: addi a0, a0, %lo(.LCPI3_0) -; RV32MV-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; RV32MV-NEXT: vle32.v v26, (a0) -; RV32MV-NEXT: vle32.v v28, (sp) -; RV32MV-NEXT: lui a0, %hi(.LCPI3_1) -; RV32MV-NEXT: addi a0, a0, %lo(.LCPI3_1) ; RV32MV-NEXT: vle32.v v30, (a0) +; RV32MV-NEXT: vmerge.vim v26, v26, -1, v0 ; RV32MV-NEXT: vand.vv v26, v28, v26 ; RV32MV-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32MV-NEXT: vmsne.vv v0, v26, v30